@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) 2024 EzraZephyr | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@ -0,0 +1,108 @@ | |||
import cv2 | |||
import tkinter as tk | |||
from tkinter import filedialog, messagebox | |||
from video_recognition import start_camera, upload_and_process_video, show_frame | |||
from process_images import HandGestureProcessor | |||
current_mode = None | |||
current_cap = None | |||
# To track the current mode and camera resources | |||
def create_gui(): | |||
root = tk.Tk() | |||
root.title("Gesture Recognition") | |||
root.geometry("800x600") | |||
canvas = tk.Canvas(root, width=640, height=480) | |||
canvas.pack() | |||
# Create a canvas to display video content | |||
camera_button = tk.Button( | |||
root, | |||
text="Use Camera for Real-time Recognition", | |||
command=lambda: switch_to_camera(canvas) | |||
) | |||
camera_button.pack(pady=10) | |||
# Button to start real-time recognition using the camera | |||
video_button = tk.Button( | |||
root, | |||
text="Upload Video File for Processing", | |||
command=lambda: select_and_process_video(canvas, root) | |||
) | |||
video_button.pack(pady=10) | |||
# Button to upload and process video files | |||
root.mainloop() | |||
def switch_to_camera(canvas): | |||
global current_mode, current_cap | |||
stop_current_operation() | |||
# Stop the current operation and release the camera | |||
current_mode = "camera" | |||
canvas.delete("all") | |||
# Set the current mode to camera and clear the Canvas | |||
current_cap = cv2.VideoCapture(0) | |||
if not current_cap.isOpened(): | |||
messagebox.showerror("Error", "Cannot open camera") | |||
current_mode = None | |||
return | |||
# Start the camera | |||
start_camera(canvas, current_cap) | |||
# Pass the canvas and current_cap to start the camera | |||
def select_and_process_video(canvas, root): | |||
global current_mode, current_cap | |||
stop_current_operation() | |||
current_mode = "video" | |||
canvas.delete("all") | |||
video_path = filedialog.askopenfilename( | |||
title="Select a Video File", | |||
filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*")) | |||
) | |||
# Select a video file | |||
if video_path: | |||
cap = cv2.VideoCapture(video_path) | |||
if not cap.isOpened(): | |||
messagebox.showerror("Error", "Cannot open video file") | |||
return | |||
# Get video width and height, and adjust Canvas size | |||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |||
cap.release() | |||
canvas.config(width=frame_width, height=frame_height) | |||
root.geometry(f"{frame_width + 160}x{frame_height + 200}") # Adjust window size | |||
# Get video dimensions and dynamically adjust the canvas size | |||
error_message = upload_and_process_video(canvas, video_path) | |||
if error_message: | |||
messagebox.showerror("Error", error_message) | |||
# Upload and process the video file | |||
def stop_current_operation(): | |||
global current_cap | |||
if current_cap and current_cap.isOpened(): | |||
current_cap.release() | |||
cv2.destroyAllWindows() | |||
current_cap = None | |||
# Stop the current operation, release camera resources, and close all windows | |||
def start_camera(canvas, cap): | |||
if not cap.isOpened(): | |||
return "Cannot open camera" | |||
gesture_processor = HandGestureProcessor() | |||
show_frame(canvas, cap, gesture_processor) | |||
# Start the camera for real-time gesture recognition | |||
if __name__ == "__main__": | |||
create_gui() |
@ -0,0 +1,35 @@ | |||
import cv2 | |||
class FingerDrawer: | |||
@staticmethod | |||
def draw_finger_points(image, hand_21, temp_handness, width, height): | |||
cz0 = hand_21.landmark[0].z | |||
index_finger_tip_str = '' | |||
for i in range(21): | |||
cx = int(hand_21.landmark[i].x * width) | |||
cy = int(hand_21.landmark[i].y * height) | |||
cz = hand_21.landmark[i].z | |||
depth_z = cz0 - cz | |||
radius = max(int(6 * (1 + depth_z * 5)), 0) | |||
# Adjust the radius of the circle based on depth | |||
if i == 0: | |||
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1) | |||
elif i == 8: | |||
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1) | |||
index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, ' | |||
elif i in [1, 5, 9, 13, 17]: | |||
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1) | |||
elif i in [2, 6, 10, 14, 18]: | |||
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1) | |||
elif i in [3, 7, 11, 15, 19]: | |||
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1) | |||
elif i in [4, 12, 16, 20]: | |||
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1) | |||
# Draw circles of different colors based on each group of joints and adjust according to the | |||
# depth relative to the wrist | |||
return image, index_finger_tip_str |
@ -0,0 +1,43 @@ | |||
from collections import deque | |||
class HandState: | |||
def __init__(self): | |||
self.gesture_locked = {'Left': False, 'Right': False} | |||
self.gesture_start_time = {'Left': 0, 'Right': 0} | |||
self.buffer_start_time = {'Left': 0, 'Right': 0} | |||
self.start_drag_time = {'Left': 0, 'Right': 0} | |||
self.dragging = {'Left': False, 'Right': False} | |||
self.drag_point = {'Left': (0, 0), 'Right': (0, 0)} | |||
self.buffer_duration = {'Left': 0.25, 'Right': 0.25} | |||
self.is_index_finger_up = {'Left': False, 'Right': False} | |||
self.index_finger_second = {'Left': 0, 'Right': 0} | |||
self.index_finger_tip = {'Left': 0, 'Right': 0} | |||
self.trajectory = {'Left': [], 'Right': []} | |||
self.square_queue = deque() | |||
self.wait_time = 1.5 | |||
self.kalman_wait_time = 0.5 | |||
self.wait_box = 2 | |||
self.rect_draw_time = {'Left': 0, 'Right': 0} | |||
self.last_drawn_box = {'Left': None, 'Right': None} | |||
def clear_hand_states(self, detected_hand='Both'): | |||
hands_to_clear = {'Left', 'Right'} | |||
if detected_hand == 'Both': | |||
hands_to_clear = hands_to_clear | |||
else: | |||
hands_to_clear -= {detected_hand} | |||
for h in hands_to_clear: | |||
self.gesture_locked[h] = False | |||
self.gesture_start_time[h] = 0 | |||
self.buffer_start_time[h] = 0 | |||
self.dragging[h] = False | |||
self.drag_point[h] = (0, 0) | |||
self.buffer_duration[h] = 0.25 | |||
self.is_index_finger_up[h] = False | |||
self.trajectory[h].clear() | |||
self.start_drag_time[h] = 0 | |||
self.rect_draw_time[h] = 0 | |||
self.last_drawn_box[h] = None | |||
# Used to record information for the left and right hands separately to avoid data conflicts |
@ -0,0 +1,24 @@ | |||
import cv2 | |||
import time | |||
from hand_gesture import HandGestureHandler | |||
class HandGestureProcessor: | |||
def __init__(self): | |||
self.hand_handler = HandGestureHandler() | |||
def process_image(self, image): | |||
start_time = time.time() | |||
height, width = image.shape[:2] | |||
image = cv2.flip(image, 1) | |||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |||
# Get image dimensions, flip, and convert color space | |||
image = self.hand_handler.handle_hand_gestures(image, width, height) | |||
spend_time = time.time() - start_time | |||
FPS = 1.0 / spend_time if spend_time > 0 else 0 | |||
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
# Calculate and display the frame rate | |||
return image |
@ -0,0 +1,406 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"id": "initial_id", | |||
"metadata": { | |||
"collapsed": true, | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:50.912839Z", | |||
"start_time": "2024-09-07T05:10:44.776680Z" | |||
} | |||
}, | |||
"source": [ | |||
"import cv2\n", | |||
"import time\n", | |||
"import mediapipe\n", | |||
"import numpy as np\n", | |||
"from collections import deque\n", | |||
"from filterpy.kalman import KalmanFilter" | |||
], | |||
"outputs": [], | |||
"execution_count": 1 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:50.928940Z", | |||
"start_time": "2024-09-07T05:10:50.913896Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"gesture_locked = {'Left':False,'Right':False}\n", | |||
"gesture_start_time = {'Left':0,'Right':0}\n", | |||
"buffer_start_time = {'Left':0,'Right':0}\n", | |||
"start_drag_time = {'Left':0,'Right':0}\n", | |||
"dragging = {'Left':False,'Right':False}\n", | |||
"drag_point = {'Left':(0, 0),'Right':(0, 0)}\n", | |||
"buffer_duration = {'Left':0.25,'Right':0.25}\n", | |||
"is_index_finger_up = {'Left':False,'Right':False}\n", | |||
"index_finger_second = {'Left':0,'Right':0}\n", | |||
"index_finger_tip = {'Left':0,'Right':0}\n", | |||
"trajectory = {'Left':[],'Right':[]}\n", | |||
"square_queue = deque()\n", | |||
"wait_time = 1.5\n", | |||
"kalman_wait_time = 0.5\n", | |||
"wait_box = 2\n", | |||
"rect_draw_time = {'Left':0,'Right':0}\n", | |||
"last_drawn_box = {'Left':None,'Right':None}\n", | |||
"elapsed_time = {'Left':0,'Right':0}" | |||
], | |||
"id": "40aada17ccd31fe", | |||
"outputs": [], | |||
"execution_count": 2 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:55.708038Z", | |||
"start_time": "2024-09-07T05:10:55.691926Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"def clear_hand_states(detected_hand='Both'):\n", | |||
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration, is_index_finger_up, trajectory, wait_time, kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n", | |||
"\n", | |||
" hands_to_clear = {'Left', 'Right'}\n", | |||
" if detected_hand == 'Both':\n", | |||
" hands_to_clear = hands_to_clear\n", | |||
" else:\n", | |||
" hands_to_clear -= {detected_hand}\n", | |||
" # Reverse check for left and right hands\n", | |||
"\n", | |||
" for h in hands_to_clear:\n", | |||
" gesture_locked[h] = False\n", | |||
" gesture_start_time[h] = 0\n", | |||
" buffer_start_time[h] = 0\n", | |||
" dragging[h] = False\n", | |||
" drag_point[h] = (0, 0)\n", | |||
" buffer_duration[h] = 0.25\n", | |||
" is_index_finger_up[h] = False\n", | |||
" trajectory[h].clear()\n", | |||
" start_drag_time[h] = 0\n", | |||
" rect_draw_time[h] = 0\n", | |||
" last_drawn_box[h] = None\n", | |||
" elapsed_time[h] = 0\n", | |||
" # Clear states for hands that are not detected" | |||
], | |||
"id": "2ee9323bb1c25cc0", | |||
"outputs": [], | |||
"execution_count": 3 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:56.547939Z", | |||
"start_time": "2024-09-07T05:10:56.532265Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"kalman_filters = {\n", | |||
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n", | |||
" 'Right': KalmanFilter(dim_x=4, dim_z=2)\n", | |||
"}\n", | |||
"\n", | |||
"for key in kalman_filters:\n", | |||
" kalman_filters[key].x = np.array([0., 0., 0., 0.])\n", | |||
" kalman_filters[key].F = np.array([[1, 0, 1, 0],\n", | |||
" [0, 1, 0, 1],\n", | |||
" [0, 0, 1, 0],\n", | |||
" [0, 0, 0, 1]])\n", | |||
" # State transition matrix\n", | |||
" kalman_filters[key].H = np.array([[1, 0, 0, 0],\n", | |||
" [0, 1, 0, 0]])\n", | |||
" # Observation matrix\n", | |||
" kalman_filters[key].P *= 1000.\n", | |||
" kalman_filters[key].R = 3\n", | |||
" kalman_filters[key].Q = np.eye(4) * 0.01\n", | |||
"\n", | |||
"def kalman_filter_point(hand_label, x, y):\n", | |||
" kf = kalman_filters[hand_label]\n", | |||
" kf.predict()\n", | |||
" kf.update([x, y])\n", | |||
" # Update state\n", | |||
" return (kf.x[0], kf.x[1])\n", | |||
"\n", | |||
"def reset_kalman_filter(hand_label, x, y):\n", | |||
" kf = kalman_filters[hand_label]\n", | |||
" kf.x = np.array([x, y, 0., 0.])\n", | |||
" kf.P *= 1000.\n", | |||
" # Reset" | |||
], | |||
"id": "96cf431d2562e7d", | |||
"outputs": [], | |||
"execution_count": 4 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:57.253008Z", | |||
"start_time": "2024-09-07T05:10:57.231898Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"mp_hands = mediapipe.solutions.hands\n", | |||
"\n", | |||
"hands = mp_hands.Hands(\n", | |||
" static_image_mode=False,\n", | |||
" max_num_hands=2,\n", | |||
" # One hand is more stable\n", | |||
" min_detection_confidence=0.5,\n", | |||
" min_tracking_confidence=0.5\n", | |||
")\n", | |||
"\n", | |||
"mp_drawing = mediapipe.solutions.drawing_utils\n", | |||
"clear_hand_states()" | |||
], | |||
"id": "edc274b7ed495122", | |||
"outputs": [], | |||
"execution_count": 5 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:10:58.920644Z", | |||
"start_time": "2024-09-07T05:10:58.881367Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"def process_image(image):\n", | |||
"\n", | |||
" start_time = time.time()\n", | |||
" height, width = image.shape[:2]\n", | |||
" image = cv2.flip(image, 1)\n", | |||
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", | |||
" # Preprocess the input frame\n", | |||
"\n", | |||
" results = hands.process(image)\n", | |||
"\n", | |||
" if results.multi_hand_landmarks:\n", | |||
" # If hands are detected\n", | |||
"\n", | |||
" handness_str = ''\n", | |||
" index_finger_tip_str = ''\n", | |||
"\n", | |||
" if len(results.multi_hand_landmarks) == 1:\n", | |||
" clear_hand_states(detected_hand=results.multi_handedness[0].classification[0].label)\n", | |||
" # If only one hand is detected, clear the data of the other hand to avoid conflicts that could cause instability.\n", | |||
"\n", | |||
" for hand_idx in range(len(results.multi_hand_landmarks)):\n", | |||
"\n", | |||
" hand_21 = results.multi_hand_landmarks[hand_idx]\n", | |||
" mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n", | |||
"\n", | |||
" temp_handness = results.multi_handedness[hand_idx].classification[0].label\n", | |||
" handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n", | |||
" is_index_finger_up[temp_handness] = False\n", | |||
" # Set to False first to prevent incorrect updates to raised when lowered\n", | |||
"\n", | |||
" cz0 = hand_21.landmark[0].z\n", | |||
" index_finger_second[temp_handness] = hand_21.landmark[7]\n", | |||
" index_finger_tip[temp_handness] = hand_21.landmark[8]\n", | |||
" # Index fingertip and first joint\n", | |||
"\n", | |||
" index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n", | |||
"\n", | |||
" if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n", | |||
" is_index_finger_up[temp_handness] = True\n", | |||
" # If the fingertip and second joint are higher than all other keypoints on the hand, consider it as a \"pointing\" gesture. \n", | |||
"\n", | |||
" if is_index_finger_up[temp_handness]:\n", | |||
" if not gesture_locked[temp_handness]:\n", | |||
" if gesture_start_time[temp_handness] == 0:\n", | |||
" gesture_start_time[temp_handness] = time.time()\n", | |||
" # Record the time when the index finger is raised\n", | |||
" elif time.time() - gesture_start_time[temp_handness] > wait_time:\n", | |||
" dragging[temp_handness] = True\n", | |||
" gesture_locked[temp_handness] = True\n", | |||
" drag_point[temp_handness] = (index_x, index_y)\n", | |||
" # If the index finger is raised for longer than the set wait time, it is considered a \"pointing\" gesture.\n", | |||
" buffer_start_time[temp_handness] = 0\n", | |||
" # Refresh the buffer time whenever the index finger is raised\n", | |||
" else:\n", | |||
" if buffer_start_time[temp_handness] == 0:\n", | |||
" buffer_start_time[temp_handness] = time.time()\n", | |||
" elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n", | |||
" gesture_start_time[temp_handness] = 0\n", | |||
" gesture_locked[temp_handness] = False\n", | |||
" dragging[temp_handness] = False\n", | |||
" # If the buffer time exceeds the set limit, it indicates the end of the pointing gesture.\n", | |||
" # This prevents incorrect clearing of the pointing gesture due to recognition errors in a single frame.\n", | |||
"\n", | |||
" if dragging[temp_handness]:\n", | |||
"\n", | |||
" if start_drag_time[temp_handness] == 0:\n", | |||
" start_drag_time[temp_handness] = time.time()\n", | |||
" reset_kalman_filter(temp_handness, index_x, index_y)\n", | |||
" # Initialize the filter whenever a line is drawn\n", | |||
"\n", | |||
" smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n", | |||
" drag_point[temp_handness] = (index_x, index_y)\n", | |||
" index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n", | |||
" cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n", | |||
" # Create a circle based on the depth distance from the wrist root\n", | |||
" # This is used to show that the pointing gesture has started\n", | |||
" # The corresponding depth points below are scaled directly\n", | |||
" drag_point_smooth = (smooth_x, smooth_y)\n", | |||
"\n", | |||
" if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n", | |||
" trajectory[temp_handness].append(drag_point_smooth)\n", | |||
" # The Kalman filter can be very unstable when initialized, with significant noise in the first few frames\n", | |||
" # Wait until the first few frames have run before adding the coordinates to the trajectory list.\n", | |||
" else:\n", | |||
" if len(trajectory[temp_handness]) > 4:\n", | |||
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n", | |||
" rect = cv2.minAreaRect(contour)\n", | |||
" box = cv2.boxPoints(rect)\n", | |||
" box = np.int0(box)\n", | |||
" rect_draw_time[temp_handness] = time.time()\n", | |||
" last_drawn_box[temp_handness] = box\n", | |||
" # If the pointing gesture ends and there are at least four points in the trajectory list,\n", | |||
" # Use the minimum bounding box to adjust the irregular drawing to a rectangle.\n", | |||
"\n", | |||
" start_drag_time[temp_handness] = 0\n", | |||
" trajectory[temp_handness].clear()\n", | |||
"\n", | |||
" for i in range(1, len(trajectory[temp_handness])):\n", | |||
"\n", | |||
" pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n", | |||
" pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n", | |||
" cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n", | |||
" # Draw lines connecting trajectory points\n", | |||
"\n", | |||
" if last_drawn_box[temp_handness] is not None:\n", | |||
" elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n", | |||
"\n", | |||
" if elapsed_time[temp_handness] < wait_box:\n", | |||
" cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n", | |||
" # Keep the rectangle visible for a while, otherwise, it's too fast to observe.\n", | |||
"\n", | |||
" elif elapsed_time[temp_handness] >= wait_box - 0.1:\n", | |||
"\n", | |||
" box = last_drawn_box[temp_handness]\n", | |||
" x_min = max(0, min(box[:, 0]))\n", | |||
" y_min = max(0, min(box[:, 1]))\n", | |||
" x_max = min(image.shape[1], max(box[:, 0]))\n", | |||
" y_max = min(image.shape[0], max(box[:, 1]))\n", | |||
" cropped_image = image[y_min:y_max, x_min:x_max]\n", | |||
" filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n", | |||
" cv2.imwrite(filename, cropped_image)\n", | |||
" last_drawn_box[temp_handness] = None\n", | |||
" # The drawn image cannot be cropped immediately, as it might wrongly crop the hand into it.\n", | |||
" # Wait a while to give the hand time to move away before extracting the rectangle from this frame.\n", | |||
"\n", | |||
" for i in range(21):\n", | |||
"\n", | |||
" cx = int(hand_21.landmark[i].x * width)\n", | |||
" cy = int(hand_21.landmark[i].y * height)\n", | |||
" cz = hand_21.landmark[i].z\n", | |||
" depth_z = cz0 - cz\n", | |||
" radius = max(int(6 * (1 + depth_z * 5)), 0)\n", | |||
"\n", | |||
" if i == 0:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n", | |||
" if i == 8:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n", | |||
" index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n", | |||
" if i in [1, 5, 9, 13, 17]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)\n", | |||
" if i in [2, 6, 10, 14, 18]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)\n", | |||
" if i in [3, 7, 11, 15, 19]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n", | |||
" if i in [4, 12, 16, 20]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n", | |||
" # Extract each keypoint, assign corresponding colors, and set depth based on the wrist root.\n", | |||
"\n", | |||
" scaler = 1\n", | |||
" image = cv2.putText(image, handness_str, (25 * scaler, 100 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2)\n", | |||
" image = cv2.putText(image, index_finger_tip_str, (25 * scaler, 150 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n", | |||
"\n", | |||
" spend_time = time.time() - start_time\n", | |||
" if spend_time > 0:\n", | |||
" FPS = 1.0 / spend_time\n", | |||
" else:\n", | |||
" FPS = 0\n", | |||
"\n", | |||
" image = cv2.putText(image, 'FPS ' + str(int(FPS)), (25 * scaler, 50 * scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25 * scaler, (0, 0, 255), 2,)\n", | |||
" # Display FPS, detected hands, and the depth value of the index fingertip relative to the wrist root.\n", | |||
"\n", | |||
" else:\n", | |||
" clear_hand_states()\n", | |||
" # If no hands are detected, clear all information.\n", | |||
"\n", | |||
" return image" | |||
], | |||
"id": "51ff809ecaf1f899", | |||
"outputs": [], | |||
"execution_count": 6 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:15.392765Z", | |||
"start_time": "2024-09-07T05:10:59.535594Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"cap = cv2.VideoCapture(1)\n", | |||
"cap.open(0)\n", | |||
"\n", | |||
"while cap.isOpened():\n", | |||
" success, frame = cap.read()\n", | |||
" if not success:\n", | |||
" print(\"Camera Error\")\n", | |||
" break\n", | |||
"\n", | |||
" frame = process_image(frame)\n", | |||
" cv2.imshow('Video', frame)\n", | |||
"\n", | |||
" if cv2.waitKey(1) & 0xFF == ord('q'):\n", | |||
" break\n", | |||
"\n", | |||
"cap.release()\n", | |||
"cv2.destroyAllWindows() " | |||
], | |||
"id": "b7ce23e80ed36041", | |||
"outputs": [], | |||
"execution_count": 7 | |||
}, | |||
{ | |||
"metadata": {}, | |||
"cell_type": "code", | |||
"outputs": [], | |||
"execution_count": null, | |||
"source": "", | |||
"id": "1102d2fc75310c6e" | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 2 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython2", | |||
"version": "2.7.6" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 5 | |||
} |
@ -0,0 +1,61 @@ | |||
import cv2 | |||
from model import HandTracker | |||
from index_finger import IndexFingerHandler | |||
from gesture_data import HandState | |||
from kalman_filter import KalmanHandler | |||
from utils_zh.finger_drawer import FingerDrawer | |||
class HandGestureHandler: | |||
def __init__(self): | |||
self.hand_state = HandState() | |||
self.kalman_handler = KalmanHandler() | |||
self.hand_tracker = HandTracker() | |||
self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler) | |||
def handle_hand_gestures(self, image, width, height, is_video): | |||
results = self.hand_tracker.process(image) | |||
if results.multi_hand_landmarks: | |||
handness_str = '' | |||
index_finger_tip_str = '' | |||
if len(results.multi_hand_landmarks) == 1: | |||
detected_hand = results.multi_handedness[0].classification[0].label | |||
self.hand_state.clear_hand_states(detected_hand) | |||
# If only one hand is detected, clear the information of the other hand | |||
# to prevent data conflicts when the second hand appears. | |||
for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks): | |||
self.hand_tracker.mp_drawing.draw_landmarks( | |||
image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS | |||
) | |||
# Draw the connections of hand keypoints | |||
temp_handness = results.multi_handedness[hand_idx].classification[0].label | |||
handness_str += f'{hand_idx}:{temp_handness}, ' | |||
self.hand_state.is_index_finger_up[temp_handness] = False | |||
image = self.index_handler.handle_index_finger( | |||
image, hand_21, temp_handness, width, height | |||
) | |||
# Handle the index finger | |||
image, index_finger_tip_str = FingerDrawer.draw_finger_points( | |||
image, hand_21, temp_handness, width, height | |||
) | |||
if is_video: | |||
image = cv2.flip(image, 1) | |||
image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
else: | |||
if is_video: | |||
image = cv2.flip(image, 1) | |||
# If it's input video from a rear-facing camera, flip the image before processing | |||
# to ensure correct left and right hand detection, and flip it back afterward | |||
# to prevent mirrored output errors. | |||
self.hand_state.clear_hand_states() | |||
# Clear hand states if no hands are detected | |||
return image |
@ -0,0 +1,114 @@ | |||
import cv2 | |||
import time | |||
import numpy as np | |||
class IndexFingerHandler: | |||
def __init__(self, hand_state, kalman_handler): | |||
self.hand_state = hand_state | |||
self.kalman_handler = kalman_handler | |||
self.wait_time = 1.5 | |||
self.kalman_wait_time = 0.5 | |||
self.wait_box = 2 | |||
def handle_index_finger(self, image, hand_21, temp_handness, width, height): | |||
cz0 = hand_21.landmark[0].z | |||
self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7] | |||
self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8] | |||
index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width) | |||
index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height) | |||
self.update_index_finger_state(hand_21, temp_handness, index_x, index_y) | |||
self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0) | |||
return image | |||
# Handle the index finger's state and gesture effect, and update the image | |||
def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y): | |||
if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y | |||
for i in range(21) if i not in [7, 8]) and \ | |||
self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y: | |||
self.hand_state.is_index_finger_up[temp_handness] = True | |||
# If both the index fingertip and first joint are above other keypoints, | |||
# consider the index finger as raised. | |||
if self.hand_state.is_index_finger_up[temp_handness]: | |||
if not self.hand_state.gesture_locked[temp_handness]: | |||
if self.hand_state.gesture_start_time[temp_handness] == 0: | |||
self.hand_state.gesture_start_time[temp_handness] = time.time() | |||
elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time: | |||
self.hand_state.dragging[temp_handness] = True | |||
self.hand_state.gesture_locked[temp_handness] = True | |||
self.hand_state.drag_point[temp_handness] = (index_x, index_y) | |||
# If the pointing gesture has lasted longer than the wait time, confirm the pointing action. | |||
self.hand_state.buffer_start_time[temp_handness] = 0 | |||
# Buffer time to prevent immediate interruption due to recognition errors. | |||
else: | |||
if self.hand_state.buffer_start_time[temp_handness] == 0: | |||
self.hand_state.buffer_start_time[temp_handness] = time.time() | |||
elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]: | |||
self.hand_state.gesture_start_time[temp_handness] = 0 | |||
self.hand_state.gesture_locked[temp_handness] = False | |||
self.hand_state.dragging[temp_handness] = False | |||
# If the interruption time of the pointing gesture exceeds the set buffer duration, formally terminate. | |||
def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0): | |||
if self.hand_state.dragging[temp_handness]: | |||
if self.hand_state.start_drag_time[temp_handness] == 0: | |||
self.hand_state.start_drag_time[temp_handness] = time.time() | |||
self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y) | |||
# If it's the first operation, record the time and reset the Kalman filter. | |||
smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y) | |||
# Use the Kalman filter to smooth the generated trajectory, reducing noise and jitter. | |||
self.hand_state.drag_point[temp_handness] = (index_x, index_y) | |||
index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0) | |||
cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1) | |||
# Adjust the circle size based on the distance from the wrist root, slightly larger than FingerDrawer for visibility during gesture lock. | |||
drag_point_smooth = (smooth_x, smooth_y) | |||
if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time: | |||
self.hand_state.trajectory[temp_handness].append(drag_point_smooth) | |||
# Wait for the Kalman filter to stabilize data before adding coordinates to the trajectory. | |||
else: | |||
if len(self.hand_state.trajectory[temp_handness]) > 4: | |||
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32) | |||
rect = cv2.minAreaRect(contour) | |||
box = cv2.boxPoints(rect) | |||
box = np.int0(box) | |||
# Calculate the minimum enclosing rectangle when the drag points exceed 4. | |||
self.hand_state.rect_draw_time[temp_handness] = time.time() | |||
self.hand_state.last_drawn_box[temp_handness] = box | |||
self.hand_state.start_drag_time[temp_handness] = 0 | |||
self.hand_state.trajectory[temp_handness].clear() | |||
# Reset and clear | |||
for i in range(1, len(self.hand_state.trajectory[temp_handness])): | |||
pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1])) | |||
pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1])) | |||
cv2.line(image, pt1, pt2, (0, 0, 255), 2) | |||
# Draw the drag path | |||
if self.hand_state.last_drawn_box[temp_handness] is not None: | |||
elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness] | |||
if elapsed_time < self.wait_box: | |||
cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2) | |||
# Keep the bounding box visible for a set period for easier observation. | |||
elif elapsed_time >= self.wait_box - 0.1: | |||
box = self.hand_state.last_drawn_box[temp_handness] | |||
x_min = max(0, min(box[:, 0])) | |||
y_min = max(0, min(box[:, 1])) | |||
x_max = min(image.shape[1], max(box[:, 0])) | |||
y_max = min(image.shape[0], max(box[:, 1])) | |||
cropped_image = image[y_min:y_max, x_min:x_max] | |||
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg" | |||
cv2.imwrite(filename, cropped_image) | |||
self.hand_state.last_drawn_box[temp_handness] = None | |||
# To avoid accidentally cropping the hand into the bounding box, | |||
# perform the crop in the last 0.1 seconds before the box disappears, | |||
# giving enough time for the hand to move away. |
@ -0,0 +1,36 @@ | |||
import numpy as np | |||
from filterpy.kalman import KalmanFilter | |||
class KalmanHandler: | |||
def __init__(self): | |||
self.kalman_filters = { | |||
'Left': KalmanFilter(dim_x=4, dim_z=2), | |||
'Right': KalmanFilter(dim_x=4, dim_z=2) | |||
} | |||
for key in self.kalman_filters: | |||
self.kalman_filters[key].x = np.array([0., 0., 0., 0.]) | |||
self.kalman_filters[key].F = np.array([[1, 0, 1, 0], | |||
[0, 1, 0, 1], | |||
[0, 0, 1, 0], | |||
[0, 0, 0, 1]]) | |||
self.kalman_filters[key].H = np.array([[1, 0, 0, 0], | |||
[0, 1, 0, 0]]) | |||
self.kalman_filters[key].P *= 1000. | |||
self.kalman_filters[key].R = 3 | |||
self.kalman_filters[key].Q = np.eye(4) * 0.01 | |||
# These parameters were obtained through multiple tests and have shown stable performance. | |||
def kalman_filter_point(self, hand_label, x, y): | |||
kf = self.kalman_filters[hand_label] | |||
kf.predict() | |||
kf.update([x, y]) | |||
# Update state | |||
return (kf.x[0], kf.x[1]) | |||
def reset_kalman_filter(self, hand_label, x, y): | |||
kf = self.kalman_filters[hand_label] | |||
kf.x = np.array([x, y, 0., 0.]) | |||
kf.P *= 1000. | |||
# Reset |
@ -0,0 +1,17 @@ | |||
import mediapipe as mp | |||
class HandTracker: | |||
def __init__(self): | |||
self.mp_hands = mp.solutions.hands | |||
self.hands = self.mp_hands.Hands( | |||
static_image_mode=False, | |||
max_num_hands=1, | |||
# Setting it to one would be more stable | |||
min_detection_confidence=0.5, | |||
min_tracking_confidence=0.5 | |||
) | |||
self.mp_drawing = mp.solutions.drawing_utils | |||
def process(self, image): | |||
results = self.hands.process(image) | |||
return results |
@ -0,0 +1,24 @@ | |||
import cv2 | |||
import time | |||
from hand_gesture import HandGestureHandler | |||
class HandGestureProcessor: | |||
def __init__(self): | |||
self.hand_handler = HandGestureHandler() | |||
def process_image(self, image, is_video): | |||
start_time = time.time() | |||
height, width = image.shape[:2] | |||
image = cv2.flip(image, 1) | |||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |||
# Preprocess the incoming video frame | |||
image = self.hand_handler.handle_hand_gestures(image, width, height, is_video) | |||
spend_time = time.time() - start_time | |||
FPS = 1.0 / spend_time if spend_time > 0 else 0 | |||
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
# Calculate and display the frame rate | |||
return image |
@ -0,0 +1,65 @@ | |||
import cv2 | |||
from process_images import HandGestureProcessor | |||
from tkinter import messagebox | |||
from PIL import Image, ImageTk | |||
def start_camera(canvas): | |||
cap = cv2.VideoCapture(0) | |||
if not cap.isOpened(): | |||
return "Cannot open camera" | |||
gesture_processor = HandGestureProcessor() | |||
show_frame(canvas, cap, gesture_processor) | |||
def show_frame(canvas, cap, gesture_processor): | |||
success, frame = cap.read() | |||
if success: | |||
processed_frame = gesture_processor.process_image(frame,False) | |||
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB) | |||
img = Image.fromarray(img) | |||
imgtk = ImageTk.PhotoImage(image=img) | |||
canvas.imgtk = imgtk | |||
canvas.create_image(0, 0, anchor="nw", image=imgtk) | |||
# 对该帧进行处理并转换为RGB显示在画布上 | |||
canvas.after(10, show_frame, canvas, cap, gesture_processor) | |||
# 实现循环调用 持续处理并显示后续的每一帧 | |||
else: | |||
cap.release() | |||
cv2.destroyAllWindows() | |||
def upload_and_process_video(canvas, video_path): | |||
cap = cv2.VideoCapture(video_path) | |||
if not cap.isOpened(): | |||
return "Cannot open video file" | |||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |||
fps = cap.get(cv2.CAP_PROP_FPS) | |||
# 获取视频的参数 | |||
output_filename = "../video/processed_output.mp4" | |||
fourcc = cv2.VideoWriter_fourcc(*'XVID') | |||
out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height)) | |||
# 设置输出视频文件路径和编码 | |||
gesture_processor = HandGestureProcessor() | |||
process_video_frame(canvas, cap, gesture_processor, out) | |||
def process_video_frame(canvas, cap, gesture_processor, out): | |||
success, frame = cap.read() | |||
if success: | |||
processed_frame = gesture_processor.process_image(frame,True) | |||
out.write(processed_frame) | |||
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB) | |||
img = Image.fromarray(img) | |||
imgtk = ImageTk.PhotoImage(image=img) | |||
canvas.imgtk = imgtk | |||
canvas.create_image(0, 0, anchor="nw", image=imgtk) | |||
canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out) | |||
else: | |||
cap.release() | |||
out.release() | |||
cv2.destroyAllWindows() | |||
messagebox.showinfo("Info", "Processed video saved as processed_output.avi") | |||
print("Processed video saved as processed_output.avi") |
@ -0,0 +1,112 @@ | |||
import cv2 | |||
import tkinter as tk | |||
from tkinter import filedialog, messagebox | |||
from video_recognition import start_camera, upload_and_process_video, show_frame | |||
from process_images import HandGestureProcessor | |||
current_mode = None | |||
current_cap = None | |||
# 用于追踪当前模式和摄像头资源 | |||
def create_gui(): | |||
root = tk.Tk() | |||
root.title("Gesture Recognition") | |||
root.geometry("800x600") | |||
canvas = tk.Canvas(root, width=640, height=480) | |||
canvas.pack() | |||
# 创建显示视频内容的画布 | |||
camera_button = tk.Button( | |||
root, | |||
text="Use Camera for Real-time Recognition", | |||
command=lambda: switch_to_camera(canvas) | |||
) | |||
camera_button.pack(pady=10) | |||
# 启动摄像头实时识别的按钮 | |||
video_button = tk.Button( | |||
root, | |||
text="Upload Video File for Processing", | |||
command=lambda: select_and_process_video(canvas, root) | |||
) | |||
video_button.pack(pady=10) | |||
# 上传并处理视频文件的按钮 | |||
root.mainloop() | |||
def switch_to_camera(canvas): | |||
global current_mode, current_cap | |||
stop_current_operation() | |||
# 停止当前操作并释放摄像头 | |||
current_mode = "camera" | |||
canvas.delete("all") | |||
# 设置当前模式为摄像头并清空Canvas | |||
current_cap = cv2.VideoCapture(0) | |||
if not current_cap.isOpened(): | |||
messagebox.showerror("Error", "Cannot open camera") | |||
current_mode = None | |||
return | |||
# 启动摄像头 | |||
start_camera(canvas, current_cap) | |||
# 传入canvas和current_cap | |||
def select_and_process_video(canvas, root): | |||
global current_mode, current_cap | |||
stop_current_operation() | |||
current_mode = "video" | |||
canvas.delete("all") | |||
video_path = filedialog.askopenfilename( | |||
title="Select a Video File", | |||
filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*")) | |||
) | |||
# 选择视频文件 | |||
if video_path: | |||
cap = cv2.VideoCapture(video_path) | |||
if not cap.isOpened(): | |||
messagebox.showerror("Error", "Cannot open video file") | |||
return | |||
# 获取视频的宽高并调整 Canvas 大小 | |||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |||
cap.release() | |||
canvas.config(width=frame_width, height=frame_height) | |||
root.geometry(f"{frame_width + 160}x{frame_height + 200}") # 调整窗口大小 | |||
# 获取视频宽高并动态调整canvas的大小 | |||
error_message = upload_and_process_video(canvas, video_path) | |||
if error_message: | |||
messagebox.showerror("Error", error_message) | |||
# 上传并处理视频文件 | |||
def stop_current_operation(): | |||
global current_cap | |||
if current_cap and current_cap.isOpened(): | |||
current_cap.release() | |||
cv2.destroyAllWindows() | |||
current_cap = None | |||
# 停止当前操作 释放摄像头资源并关闭所有窗口 | |||
def start_camera(canvas, cap): | |||
if not cap.isOpened(): | |||
return "Cannot open camera" | |||
gesture_processor = HandGestureProcessor() | |||
show_frame(canvas, cap, gesture_processor) | |||
# 启动摄像头进行实时手势识别 | |||
if __name__ == "__main__": | |||
create_gui() |
@ -0,0 +1,34 @@ | |||
import cv2 | |||
class FingerDrawer: | |||
@staticmethod | |||
def draw_finger_points(image, hand_21, temp_handness, width, height): | |||
cz0 = hand_21.landmark[0].z | |||
index_finger_tip_str = '' | |||
for i in range(21): | |||
cx = int(hand_21.landmark[i].x * width) | |||
cy = int(hand_21.landmark[i].y * height) | |||
cz = hand_21.landmark[i].z | |||
depth_z = cz0 - cz | |||
radius = max(int(6 * (1 + depth_z * 5)), 0) | |||
# 根据深度调整圆点的半径 | |||
if i == 0: | |||
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1) | |||
elif i == 8: | |||
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1) | |||
index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, ' | |||
elif i in [1, 5, 9, 13, 17]: | |||
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1) | |||
elif i in [2, 6, 10, 14, 18]: | |||
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1) | |||
elif i in [3, 7, 11, 15, 19]: | |||
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1) | |||
elif i in [4, 12, 16, 20]: | |||
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1) | |||
# 根据每组关节绘制不同颜色的圆点 同时根据距离掌根的深度信息进行调整 | |||
return image, index_finger_tip_str |
@ -0,0 +1,43 @@ | |||
from collections import deque | |||
class HandState: | |||
def __init__(self): | |||
self.gesture_locked = {'Left': False, 'Right': False} | |||
self.gesture_start_time = {'Left': 0, 'Right': 0} | |||
self.buffer_start_time = {'Left': 0, 'Right': 0} | |||
self.start_drag_time = {'Left': 0, 'Right': 0} | |||
self.dragging = {'Left': False, 'Right': False} | |||
self.drag_point = {'Left': (0, 0), 'Right': (0, 0)} | |||
self.buffer_duration = {'Left': 0.25, 'Right': 0.25} | |||
self.is_index_finger_up = {'Left': False, 'Right': False} | |||
self.index_finger_second = {'Left': 0, 'Right': 0} | |||
self.index_finger_tip = {'Left': 0, 'Right': 0} | |||
self.trajectory = {'Left': [], 'Right': []} | |||
self.square_queue = deque() | |||
self.wait_time = 1.5 | |||
self.kalman_wait_time = 0.5 | |||
self.wait_box = 2 | |||
self.rect_draw_time = {'Left': 0, 'Right': 0} | |||
self.last_drawn_box = {'Left': None, 'Right': None} | |||
def clear_hand_states(self, detected_hand='Both'): | |||
hands_to_clear = {'Left', 'Right'} | |||
if detected_hand == 'Both': | |||
hands_to_clear = hands_to_clear | |||
else: | |||
hands_to_clear -= {detected_hand} | |||
for h in hands_to_clear: | |||
self.gesture_locked[h] = False | |||
self.gesture_start_time[h] = 0 | |||
self.buffer_start_time[h] = 0 | |||
self.dragging[h] = False | |||
self.drag_point[h] = (0, 0) | |||
self.buffer_duration[h] = 0.25 | |||
self.is_index_finger_up[h] = False | |||
self.trajectory[h].clear() | |||
self.start_drag_time[h] = 0 | |||
self.rect_draw_time[h] = 0 | |||
self.last_drawn_box[h] = None | |||
# 用于记录左右手的信息 需要分开存放 否则可能会出现数据冲突 |
@ -0,0 +1,24 @@ | |||
import cv2 | |||
import time | |||
from hand_gesture import HandGestureHandler | |||
class HandGestureProcessor: | |||
def __init__(self): | |||
self.hand_handler = HandGestureHandler() | |||
def process_image(self, image): | |||
start_time = time.time() | |||
height, width = image.shape[:2] | |||
image = cv2.flip(image, 1) | |||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |||
# 获取图像尺寸 翻转并转换颜色空间 | |||
image = self.hand_handler.handle_hand_gestures(image, width, height) | |||
spend_time = time.time() - start_time | |||
FPS = 1.0 / spend_time if spend_time > 0 else 0 | |||
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
# 计算并显示帧率 | |||
return image |
@ -0,0 +1,403 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"id": "initial_id", | |||
"metadata": { | |||
"collapsed": true, | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.761076Z", | |||
"start_time": "2024-09-07T05:11:22.404354Z" | |||
} | |||
}, | |||
"source": [ | |||
"import cv2\n", | |||
"import time\n", | |||
"import mediapipe\n", | |||
"import numpy as np\n", | |||
"from collections import deque\n", | |||
"from filterpy.kalman import KalmanFilter" | |||
], | |||
"outputs": [], | |||
"execution_count": 1 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.777139Z", | |||
"start_time": "2024-09-07T05:11:28.761076Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"gesture_locked = {'Left':False,'Right':False}\n", | |||
"gesture_start_time = {'Left':0,'Right':0}\n", | |||
"buffer_start_time = {'Left':0,'Right':0}\n", | |||
"start_drag_time = {'Left':0,'Right':0}\n", | |||
"dragging = {'Left':False,'Right':False}\n", | |||
"drag_point = {'Left':(0, 0),'Right':(0, 0)}\n", | |||
"buffer_duration = {'Left':0.25,'Right':0.25}\n", | |||
"is_index_finger_up = {'Left':False,'Right':False}\n", | |||
"index_finger_second = {'Left':0,'Right':0}\n", | |||
"index_finger_tip = {'Left':0,'Right':0}\n", | |||
"trajectory = {'Left':[],'Right':[]}\n", | |||
"square_queue = deque()\n", | |||
"wait_time = 1.5\n", | |||
"kalman_wait_time = 0.5\n", | |||
"wait_box = 2\n", | |||
"rect_draw_time = {'Left':0,'Right':0}\n", | |||
"last_drawn_box = {'Left':None,'Right':None}\n", | |||
"elapsed_time = {'Left':0,'Right':0}" | |||
], | |||
"id": "40aada17ccd31fe", | |||
"outputs": [], | |||
"execution_count": 2 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.824573Z", | |||
"start_time": "2024-09-07T05:11:28.777139Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"def clear_hand_states(detected_hand ='Both'):\n", | |||
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n", | |||
" \n", | |||
" hands_to_clear = {'Left', 'Right'}\n", | |||
" if detected_hand == 'Both':\n", | |||
" hands_to_clear = hands_to_clear\n", | |||
" else:\n", | |||
" hands_to_clear -= {detected_hand}\n", | |||
" # 反向判断左右手\n", | |||
"\n", | |||
" for h in hands_to_clear:\n", | |||
" gesture_locked[h] = False\n", | |||
" gesture_start_time[h] = 0\n", | |||
" buffer_start_time[h] = 0\n", | |||
" dragging[h] = False\n", | |||
" drag_point[h] = (0, 0)\n", | |||
" buffer_duration[h] = 0.25\n", | |||
" is_index_finger_up[h] = False\n", | |||
" trajectory[h].clear()\n", | |||
" start_drag_time[h] = 0\n", | |||
" rect_draw_time[h] = 0\n", | |||
" last_drawn_box[h] = None\n", | |||
" elapsed_time[h] = 0\n", | |||
" # 清空没被检测的手" | |||
], | |||
"id": "2ee9323bb1c25cc0", | |||
"outputs": [], | |||
"execution_count": 3 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.855831Z", | |||
"start_time": "2024-09-07T05:11:28.824573Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"kalman_filters = {\n", | |||
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n", | |||
" 'Right': KalmanFilter(dim_x=4, dim_z=2)\n", | |||
"}\n", | |||
"\n", | |||
"for key in kalman_filters:\n", | |||
" kalman_filters[key].x = np.array([0., 0., 0., 0.])\n", | |||
" kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])\n", | |||
" # 状态转移矩阵\n", | |||
" kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])\n", | |||
" # 观测矩阵\n", | |||
" kalman_filters[key].P *= 1000.\n", | |||
" kalman_filters[key].R = 3\n", | |||
" kalman_filters[key].Q = np.eye(4) * 0.01\n", | |||
"\n", | |||
"def kalman_filter_point(hand_label, x, y):\n", | |||
" kf = kalman_filters[hand_label]\n", | |||
" kf.predict()\n", | |||
" kf.update([x, y])\n", | |||
" # 更新状态\n", | |||
" return (kf.x[0], kf.x[1])\n", | |||
"\n", | |||
"def reset_kalman_filter(hand_label, x, y):\n", | |||
" kf = kalman_filters[hand_label]\n", | |||
" kf.x = np.array([x, y, 0., 0.])\n", | |||
" kf.P *= 1000.\n", | |||
" # 重置" | |||
], | |||
"id": "96cf431d2562e7d", | |||
"outputs": [], | |||
"execution_count": 4 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.887346Z", | |||
"start_time": "2024-09-07T05:11:28.855831Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"\n", | |||
"mp_hands = mediapipe.solutions.hands\n", | |||
"\n", | |||
"hands = mp_hands.Hands(\n", | |||
" static_image_mode=False,\n", | |||
" max_num_hands=2,\n", | |||
" # 一只更稳定\n", | |||
" min_detection_confidence=0.5,\n", | |||
" min_tracking_confidence=0.5\n", | |||
")\n", | |||
"\n", | |||
"mp_drawing = mediapipe.solutions.drawing_utils\n", | |||
"clear_hand_states()" | |||
], | |||
"id": "edc274b7ed495122", | |||
"outputs": [], | |||
"execution_count": 5 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:11:28.934274Z", | |||
"start_time": "2024-09-07T05:11:28.887346Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"def process_image(image):\n", | |||
"\n", | |||
" start_time = time.time()\n", | |||
" height, width = image.shape[:2]\n", | |||
" image = cv2.flip(image, 1)\n", | |||
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", | |||
" # 预处理帧\n", | |||
" \n", | |||
" results = hands.process(image)\n", | |||
" \n", | |||
" if results.multi_hand_landmarks:\n", | |||
" # 如果检测到手\n", | |||
" \n", | |||
" handness_str = ''\n", | |||
" index_finger_tip_str = ''\n", | |||
" \n", | |||
" if len(results.multi_hand_landmarks) == 1:\n", | |||
" clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)\n", | |||
" # 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定\n", | |||
" \n", | |||
" for hand_idx in range(len(results.multi_hand_landmarks)):\n", | |||
" \n", | |||
" hand_21 = results.multi_hand_landmarks[hand_idx]\n", | |||
" mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n", | |||
" \n", | |||
" temp_handness = results.multi_handedness[hand_idx].classification[0].label\n", | |||
" handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n", | |||
" is_index_finger_up[temp_handness] = False\n", | |||
" # 先设置为false 防止放下被错误更新为竖起\n", | |||
" \n", | |||
" cz0 = hand_21.landmark[0].z\n", | |||
" index_finger_second[temp_handness] = hand_21.landmark[7]\n", | |||
" index_finger_tip[temp_handness] = hand_21.landmark[8]\n", | |||
" # 食指指尖和第一个关节\n", | |||
" \n", | |||
" index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n", | |||
"\n", | |||
" if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n", | |||
" is_index_finger_up[temp_handness] = True\n", | |||
" # 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作 \n", | |||
"\n", | |||
" if is_index_finger_up[temp_handness]:\n", | |||
" if not gesture_locked[temp_handness]:\n", | |||
" if gesture_start_time[temp_handness] == 0:\n", | |||
" gesture_start_time[temp_handness] = time.time()\n", | |||
" # 记录食指抬起的时间\n", | |||
" elif time.time() - gesture_start_time[temp_handness] > wait_time:\n", | |||
" dragging[temp_handness] = True\n", | |||
" gesture_locked[temp_handness] = True\n", | |||
" drag_point[temp_handness] = (index_x, index_y)\n", | |||
" # 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作\n", | |||
" buffer_start_time[temp_handness] = 0\n", | |||
" # 检测到食指竖起就刷新缓冲时间\n", | |||
" else:\n", | |||
" if buffer_start_time[temp_handness] == 0:\n", | |||
" buffer_start_time[temp_handness] = time.time()\n", | |||
" elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n", | |||
" gesture_start_time[temp_handness] = 0\n", | |||
" gesture_locked[temp_handness] = False\n", | |||
" dragging[temp_handness] = False\n", | |||
" # 如果缓冲时间大于设定 就证明已经结束指向操作\n", | |||
" # 这样可以防止某一帧识别有误导致指向操作被错误清除\n", | |||
" \n", | |||
" if dragging[temp_handness]:\n", | |||
"\n", | |||
" if start_drag_time[temp_handness] == 0:\n", | |||
" start_drag_time[temp_handness] = time.time()\n", | |||
" reset_kalman_filter(temp_handness, index_x, index_y)\n", | |||
" # 每次画线的时候初始化滤波器\n", | |||
" \n", | |||
" smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n", | |||
" drag_point[temp_handness] = (index_x, index_y)\n", | |||
" index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n", | |||
" cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n", | |||
" # 根据离掌根的深度距离来构建一个圆\n", | |||
" # 用来显示已经开始指向操作\n", | |||
" # 和下方构建的深度点位对应 直接用倍数\n", | |||
" drag_point_smooth = (smooth_x, smooth_y)\n", | |||
" \n", | |||
" if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n", | |||
" trajectory[temp_handness].append(drag_point_smooth)\n", | |||
" # 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声\n", | |||
" # 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中\n", | |||
" else:\n", | |||
" if len(trajectory[temp_handness]) > 4:\n", | |||
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n", | |||
" rect = cv2.minAreaRect(contour)\n", | |||
" box = cv2.boxPoints(rect)\n", | |||
" box = np.int0(box)\n", | |||
" rect_draw_time[temp_handness] = time.time()\n", | |||
" last_drawn_box[temp_handness] = box\n", | |||
" # 如果指向操作结束 轨迹列表有至少四个点的时候\n", | |||
" # 使用最小包围图形将画的不规则图案调整为一个矩形\n", | |||
"\n", | |||
" start_drag_time[temp_handness] = 0\n", | |||
" trajectory[temp_handness].clear()\n", | |||
"\n", | |||
" for i in range(1, len(trajectory[temp_handness])):\n", | |||
"\n", | |||
" pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n", | |||
" pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n", | |||
" cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n", | |||
" # 绘制连接轨迹点的线\n", | |||
"\n", | |||
" if last_drawn_box[temp_handness] is not None:\n", | |||
" elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n", | |||
" \n", | |||
" if elapsed_time[temp_handness] < wait_box:\n", | |||
" cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n", | |||
" # 将矩形框保留一段时间 否则一帧太快 无法看清效果\n", | |||
" \n", | |||
" elif elapsed_time[temp_handness] >= wait_box - 0.1:\n", | |||
" \n", | |||
" box = last_drawn_box[temp_handness]\n", | |||
" x_min = max(0, min(box[:, 0]))\n", | |||
" y_min = max(0, min(box[:, 1]))\n", | |||
" x_max = min(image.shape[1], max(box[:, 0]))\n", | |||
" y_max = min(image.shape[0], max(box[:, 1]))\n", | |||
" cropped_image = image[y_min:y_max, x_min:x_max]\n", | |||
" filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n", | |||
" cv2.imwrite(filename, cropped_image)\n", | |||
" last_drawn_box[temp_handness] = None\n", | |||
" # 不能直接剪裁画完的图像 可能会错误的将手剪裁进去\n", | |||
" # 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来\n", | |||
" \n", | |||
" for i in range(21):\n", | |||
" \n", | |||
" cx = int(hand_21.landmark[i].x * width)\n", | |||
" cy = int(hand_21.landmark[i].y * height)\n", | |||
" cz = hand_21.landmark[i].z\n", | |||
" depth_z = cz0 - cz\n", | |||
" radius = max(int(6 * (1 + depth_z*5)), 0)\n", | |||
" \n", | |||
" if i == 0:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n", | |||
" if i == 8:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n", | |||
" index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n", | |||
" if i in [1,5,9,13,17]: \n", | |||
" image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)\n", | |||
" if i in [2,6,10,14,18]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)\n", | |||
" if i in [3,7,11,15,19]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n", | |||
" if i in [4,12,16,20]:\n", | |||
" image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n", | |||
" # 提取出每一个关节点 赋予对应的颜色和根据掌根的深度\n", | |||
" \n", | |||
" scaler= 1\n", | |||
" image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n", | |||
" image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n", | |||
"\n", | |||
" spend_time = time.time() - start_time\n", | |||
" if spend_time > 0:\n", | |||
" FPS = 1.0 / spend_time\n", | |||
" else:\n", | |||
" FPS = 0\n", | |||
" \n", | |||
" image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)\n", | |||
" # 显示FPS 检测到的手和食指指尖对于掌根的深度值\n", | |||
" \n", | |||
" else:\n", | |||
" clear_hand_states()\n", | |||
" # 如果没检测到手就清空全部信息\n", | |||
" \n", | |||
" return image" | |||
], | |||
"id": "51ff809ecaf1f899", | |||
"outputs": [], | |||
"execution_count": 6 | |||
}, | |||
{ | |||
"metadata": { | |||
"ExecuteTime": { | |||
"end_time": "2024-09-07T05:19:32.248575Z", | |||
"start_time": "2024-09-07T05:11:28.934663Z" | |||
} | |||
}, | |||
"cell_type": "code", | |||
"source": [ | |||
"cap = cv2.VideoCapture(1)\n", | |||
"cap.open(0)\n", | |||
"\n", | |||
"while cap.isOpened():\n", | |||
" success, frame = cap.read()\n", | |||
" if not success:\n", | |||
" print(\"Camera Error\")\n", | |||
" break\n", | |||
" \n", | |||
" frame = process_image(frame)\n", | |||
" cv2.imshow('Video', frame)\n", | |||
" \n", | |||
" if cv2.waitKey(1) & 0xFF == ord('q'):\n", | |||
" break\n", | |||
" \n", | |||
"cap.release()\n", | |||
"cv2.destroyAllWindows() " | |||
], | |||
"id": "b7ce23e80ed36041", | |||
"outputs": [], | |||
"execution_count": 7 | |||
}, | |||
{ | |||
"metadata": {}, | |||
"cell_type": "code", | |||
"outputs": [], | |||
"execution_count": null, | |||
"source": "", | |||
"id": "10fca4bc34a944ea" | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 2 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython2", | |||
"version": "2.7.6" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 5 | |||
} |
@ -0,0 +1,56 @@ | |||
import cv2 | |||
from model import HandTracker | |||
from index_finger import IndexFingerHandler | |||
from gesture_data import HandState | |||
from kalman_filter import KalmanHandler | |||
from utils_zh.finger_drawer import FingerDrawer | |||
class HandGestureHandler: | |||
def __init__(self): | |||
self.hand_state = HandState() | |||
self.kalman_handler = KalmanHandler() | |||
self.hand_tracker = HandTracker() | |||
self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler) | |||
def handle_hand_gestures(self, image, width, height, is_video): | |||
results = self.hand_tracker.process(image) | |||
if results.multi_hand_landmarks: | |||
handness_str = '' | |||
index_finger_tip_str = '' | |||
if len(results.multi_hand_landmarks) == 1: | |||
detected_hand = results.multi_handedness[0].classification[0].label | |||
self.hand_state.clear_hand_states(detected_hand) | |||
# 如果只检测到了一只手 那么就清空另一只手的信息 以免第二只手出现的时候数据冲突 | |||
for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks): | |||
self.hand_tracker.mp_drawing.draw_landmarks( | |||
image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS | |||
) | |||
# 绘制手部关键点连接 | |||
temp_handness = results.multi_handedness[hand_idx].classification[0].label | |||
handness_str += f'{hand_idx}:{temp_handness}, ' | |||
self.hand_state.is_index_finger_up[temp_handness] = False | |||
image = self.index_handler.handle_index_finger( | |||
image, hand_21, temp_handness, width, height | |||
) | |||
# 处理食指 | |||
image, index_finger_tip_str = FingerDrawer.draw_finger_points(image, hand_21, temp_handness, width, height) | |||
if is_video: | |||
image = cv2.flip(image, 1) | |||
image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
else: | |||
if is_video: | |||
image = cv2.flip(image, 1) | |||
# 如果是后置摄像头的输入视频,则需要在处理前翻转图像,确保手势检测的左右手正确; | |||
# 处理完毕后再翻转回来,以防止最终输出的图像出现镜像错误。 | |||
self.hand_state.clear_hand_states() | |||
# 如果未检测到手 则清空手部状态 | |||
return image |
@ -0,0 +1,112 @@ | |||
import cv2 | |||
import time | |||
import numpy as np | |||
class IndexFingerHandler: | |||
def __init__(self, hand_state, kalman_handler): | |||
self.hand_state = hand_state | |||
self.kalman_handler = kalman_handler | |||
self.wait_time = 1.5 | |||
self.kalman_wait_time = 0.5 | |||
self.wait_box = 2 | |||
def handle_index_finger(self, image, hand_21, temp_handness, width, height): | |||
cz0 = hand_21.landmark[0].z | |||
self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7] | |||
self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8] | |||
index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width) | |||
index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height) | |||
self.update_index_finger_state(hand_21, temp_handness, index_x, index_y) | |||
self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0) | |||
return image | |||
# 处理食指的状态和手势效果,并更新图像 | |||
def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y): | |||
if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y | |||
for i in range(21) if i not in [7, 8]) and \ | |||
self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y: | |||
self.hand_state.is_index_finger_up[temp_handness] = True | |||
# 如果食指指尖和第一个关节都大于其他关键点 则判定为食指抬起 | |||
if self.hand_state.is_index_finger_up[temp_handness]: | |||
if not self.hand_state.gesture_locked[temp_handness]: | |||
if self.hand_state.gesture_start_time[temp_handness] == 0: | |||
self.hand_state.gesture_start_time[temp_handness] = time.time() | |||
elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time: | |||
self.hand_state.dragging[temp_handness] = True | |||
self.hand_state.gesture_locked[temp_handness] = True | |||
self.hand_state.drag_point[temp_handness] = (index_x, index_y) | |||
# 如果食指指向操作已经超过了等待的时间 则设定为正式进行指向操作 | |||
self.hand_state.buffer_start_time[temp_handness] = 0 | |||
# 防止识别错误导致指向操作迅速中断的缓冲时间 | |||
else: | |||
if self.hand_state.buffer_start_time[temp_handness] == 0: | |||
self.hand_state.buffer_start_time[temp_handness] = time.time() | |||
elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]: | |||
self.hand_state.gesture_start_time[temp_handness] = 0 | |||
self.hand_state.gesture_locked[temp_handness] = False | |||
self.hand_state.dragging[temp_handness] = False | |||
# 如果食指指向操作的中断时间已经超过了设定的缓冲时间 则正式终断 | |||
def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0): | |||
if self.hand_state.dragging[temp_handness]: | |||
if self.hand_state.start_drag_time[temp_handness] == 0: | |||
self.hand_state.start_drag_time[temp_handness] = time.time() | |||
self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y) | |||
# 如果是首次操作 则记录时间并重置kalman滤波器 | |||
smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y) | |||
# 使用kalman滤波器平滑生成的轨迹 减少噪声和抖动 | |||
self.hand_state.drag_point[temp_handness] = (index_x, index_y) | |||
index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0) | |||
cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1) | |||
# 根据离掌根的距离同步调整圆圈大小 但是要比FingerDrawer的同比增大一些 可以看清是否锁定指向操作 | |||
drag_point_smooth = (smooth_x, smooth_y) | |||
if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time: | |||
self.hand_state.trajectory[temp_handness].append(drag_point_smooth) | |||
# 因为滤波器初始化时需要时间稳定数据 所以等待其稳定后再将坐标点加到轨迹中 | |||
else: | |||
if len(self.hand_state.trajectory[temp_handness]) > 4: | |||
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32) | |||
rect = cv2.minAreaRect(contour) | |||
box = cv2.boxPoints(rect) | |||
box = np.int0(box) | |||
# 当拖拽点数大于4时则计算最小外接矩形 | |||
self.hand_state.rect_draw_time[temp_handness] = time.time() | |||
self.hand_state.last_drawn_box[temp_handness] = box | |||
self.hand_state.start_drag_time[temp_handness] = 0 | |||
self.hand_state.trajectory[temp_handness].clear() | |||
# 重置 清空 | |||
for i in range(1, len(self.hand_state.trajectory[temp_handness])): | |||
pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1])) | |||
pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1])) | |||
cv2.line(image, pt1, pt2, (0, 0, 255), 2) | |||
# 绘制拖拽路径 | |||
if self.hand_state.last_drawn_box[temp_handness] is not None: | |||
elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness] | |||
if elapsed_time < self.wait_box: | |||
cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2) | |||
# 为了方便观测 需要保留显示包围框一定时间 | |||
elif elapsed_time >= self.wait_box - 0.1: | |||
box = self.hand_state.last_drawn_box[temp_handness] | |||
x_min = max(0, min(box[:, 0])) | |||
y_min = max(0, min(box[:, 1])) | |||
x_max = min(image.shape[1], max(box[:, 0])) | |||
y_max = min(image.shape[0], max(box[:, 1])) | |||
cropped_image = image[y_min:y_max, x_min:x_max] | |||
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg" | |||
cv2.imwrite(filename, cropped_image) | |||
self.hand_state.last_drawn_box[temp_handness] = None | |||
# 因为如果画完包围框立即剪裁 很有可能把手错误的剪裁进去 | |||
# 所以在包围框消失的前0.1秒剪裁 这样有足够的时间让手移走 |
@ -0,0 +1,36 @@ | |||
import numpy as np | |||
from filterpy.kalman import KalmanFilter | |||
class KalmanHandler: | |||
def __init__(self): | |||
self.kalman_filters = { | |||
'Left': KalmanFilter(dim_x=4, dim_z=2), | |||
'Right': KalmanFilter(dim_x=4, dim_z=2) | |||
} | |||
for key in self.kalman_filters: | |||
self.kalman_filters[key].x = np.array([0., 0., 0., 0.]) | |||
self.kalman_filters[key].F = np.array([[1, 0, 1, 0], | |||
[0, 1, 0, 1], | |||
[0, 0, 1, 0], | |||
[0, 0, 0, 1]]) | |||
self.kalman_filters[key].H = np.array([[1, 0, 0, 0], | |||
[0, 1, 0, 0]]) | |||
self.kalman_filters[key].P *= 1000. | |||
self.kalman_filters[key].R = 3 | |||
self.kalman_filters[key].Q = np.eye(4) * 0.01 | |||
# 这些参数通过多次测试得出 表现较为稳定 | |||
def kalman_filter_point(self, hand_label, x, y): | |||
kf = self.kalman_filters[hand_label] | |||
kf.predict() | |||
kf.update([x, y]) | |||
# 更新状态 | |||
return (kf.x[0], kf.x[1]) | |||
def reset_kalman_filter(self, hand_label, x, y): | |||
kf = self.kalman_filters[hand_label] | |||
kf.x = np.array([x, y, 0., 0.]) | |||
kf.P *= 1000. | |||
# 重置 |
@ -0,0 +1,17 @@ | |||
import mediapipe as mp | |||
class HandTracker: | |||
def __init__(self): | |||
self.mp_hands = mp.solutions.hands | |||
self.hands = self.mp_hands.Hands( | |||
static_image_mode=False, | |||
max_num_hands=1, | |||
# 一只会更稳定 | |||
min_detection_confidence=0.5, | |||
min_tracking_confidence=0.5 | |||
) | |||
self.mp_drawing = mp.solutions.drawing_utils | |||
def process(self, image): | |||
results = self.hands.process(image) | |||
return results |
@ -0,0 +1,24 @@ | |||
import cv2 | |||
import time | |||
from hand_gesture import HandGestureHandler | |||
class HandGestureProcessor: | |||
def __init__(self): | |||
self.hand_handler = HandGestureHandler() | |||
def process_image(self, image, is_video): | |||
start_time = time.time() | |||
height, width = image.shape[:2] | |||
image = cv2.flip(image, 1) | |||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |||
# 预处理传入的视频帧 | |||
image = self.hand_handler.handle_hand_gestures(image, width, height, is_video) | |||
spend_time = time.time() - start_time | |||
FPS = 1.0 / spend_time if spend_time > 0 else 0 | |||
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2) | |||
# 计算并显示帧率 | |||
return image |
@ -0,0 +1,65 @@ | |||
import cv2 | |||
from process_images import HandGestureProcessor | |||
from tkinter import messagebox | |||
from PIL import Image, ImageTk | |||
def start_camera(canvas): | |||
cap = cv2.VideoCapture(0) | |||
if not cap.isOpened(): | |||
return "Cannot open camera" | |||
gesture_processor = HandGestureProcessor() | |||
show_frame(canvas, cap, gesture_processor) | |||
def show_frame(canvas, cap, gesture_processor): | |||
success, frame = cap.read() | |||
if success: | |||
processed_frame = gesture_processor.process_image(frame,False) | |||
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB) | |||
img = Image.fromarray(img) | |||
imgtk = ImageTk.PhotoImage(image=img) | |||
canvas.imgtk = imgtk | |||
canvas.create_image(0, 0, anchor="nw", image=imgtk) | |||
# 对该帧进行处理并转换为RGB显示在画布上 | |||
canvas.after(10, show_frame, canvas, cap, gesture_processor) | |||
# 实现循环调用 持续处理并显示后续的每一帧 | |||
else: | |||
cap.release() | |||
cv2.destroyAllWindows() | |||
def upload_and_process_video(canvas, video_path): | |||
cap = cv2.VideoCapture(video_path) | |||
if not cap.isOpened(): | |||
return "Cannot open video file" | |||
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |||
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |||
fps = cap.get(cv2.CAP_PROP_FPS) | |||
# 获取视频的参数 | |||
output_filename = "../video/processed_output.mp4" | |||
fourcc = cv2.VideoWriter_fourcc(*'XVID') | |||
out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height)) | |||
# 设置输出视频文件路径和编码 | |||
gesture_processor = HandGestureProcessor() | |||
process_video_frame(canvas, cap, gesture_processor, out) | |||
def process_video_frame(canvas, cap, gesture_processor, out): | |||
success, frame = cap.read() | |||
if success: | |||
processed_frame = gesture_processor.process_image(frame,True) | |||
out.write(processed_frame) | |||
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB) | |||
img = Image.fromarray(img) | |||
imgtk = ImageTk.PhotoImage(image=img) | |||
canvas.imgtk = imgtk | |||
canvas.create_image(0, 0, anchor="nw", image=imgtk) | |||
canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out) | |||
else: | |||
cap.release() | |||
out.release() | |||
cv2.destroyAllWindows() | |||
messagebox.showinfo("Info", "Processed video saved as processed_output.avi") | |||
print("Processed video saved as processed_output.avi") |