Browse Source

fix_bugs_in_screenshot_pro

screenshot
ydw 2 months ago
parent
commit
4422837d1a
26 changed files with 129 additions and 353 deletions
  1. +5
    -0
      .vscode/settings.json
  2. BIN
      image/cropped_Right_1753179393.jpg
  3. BIN
      image/cropped_Right_1753179532.jpg
  4. BIN
      image/cropped_Right_1753179605.jpg
  5. +0
    -0
      main.py
  6. +44
    -29
      utils/GUI.py
  7. BIN
      utils/__pycache__/finger_drawer.cpython-312.pyc
  8. BIN
      utils/__pycache__/finger_drawer.cpython-38.pyc
  9. BIN
      utils/__pycache__/gesture_data.cpython-312.pyc
  10. BIN
      utils/__pycache__/gesture_data.cpython-38.pyc
  11. BIN
      utils/__pycache__/hand_gesture.cpython-312.pyc
  12. BIN
      utils/__pycache__/hand_gesture.cpython-38.pyc
  13. BIN
      utils/__pycache__/index_finger.cpython-312.pyc
  14. BIN
      utils/__pycache__/index_finger.cpython-38.pyc
  15. BIN
      utils/__pycache__/kalman_filter.cpython-312.pyc
  16. BIN
      utils/__pycache__/kalman_filter.cpython-38.pyc
  17. BIN
      utils/__pycache__/model.cpython-312.pyc
  18. BIN
      utils/__pycache__/model.cpython-38.pyc
  19. BIN
      utils/__pycache__/process_images.cpython-312.pyc
  20. BIN
      utils/__pycache__/process_images.cpython-38.pyc
  21. BIN
      utils/__pycache__/video_recognition.cpython-312.pyc
  22. BIN
      utils/__pycache__/video_recognition.cpython-38.pyc
  23. +78
    -44
      utils/gesture_recognition.ipynb
  24. +1
    -1
      utils/hand_gesture.py
  25. +1
    -1
      utils/index_finger.py
  26. +0
    -278
      utils/main.py

+ 5
- 0
.vscode/settings.json View File

@ -0,0 +1,5 @@
{
"python-envs.defaultEnvManager": "ms-python.python:conda",
"python-envs.defaultPackageManager": "ms-python.python:conda",
"python-envs.pythonProjects": []
}

BIN
image/cropped_Right_1753179393.jpg View File

Before After
Width: 275  |  Height: 254  |  Size: 26 KiB

BIN
image/cropped_Right_1753179532.jpg View File

Before After
Width: 268  |  Height: 257  |  Size: 15 KiB

BIN
image/cropped_Right_1753179605.jpg View File

Before After
Width: 585  |  Height: 384  |  Size: 59 KiB

+ 0
- 0
main.py View File


+ 44
- 29
utils/GUI.py View File

@ -9,33 +9,44 @@ current_mode = None
current_cap = None
# 用于追踪当前模式和摄像头资源
# 初始化图形界面主要的逻辑
def create_gui():
root = tk.Tk()
root.title("Gesture Recognition")
root.geometry("800x600")
canvas = tk.Canvas(root, width=640, height=480)
canvas.pack()
# 创建显示视频内容的画布
camera_button = tk.Button(
root,
text="Use Camera for Real-time Recognition",
command=lambda: switch_to_camera(canvas)
)
camera_button.pack(pady=10)
# 启动摄像头实时识别的按钮
video_button = tk.Button(
root,
text="Upload Video File for Processing",
command=lambda: select_and_process_video(canvas, root)
)
video_button.pack(pady=10)
# 上传并处理视频文件的按钮
root.mainloop()
try:
print("开始创建GUI界面")
root = tk.Tk()
root.title("Gesture Recognition")
root.geometry("800x600")
print("GUI窗口创建成功")
canvas = tk.Canvas(root, width=640, height=480)
canvas.pack()
print("画布创建成功")
camera_button = tk.Button(
root,
text="Use Camera for Real-time Recognition",
command=lambda: switch_to_camera(canvas)
)
camera_button.pack(pady=10)
print("摄像头按钮创建成功")
video_button = tk.Button(
root,
text="Upload Video File for Processing",
command=lambda: select_and_process_video(canvas, root)
)
video_button.pack(pady=10)
print("视频上传按钮创建成功")
print("GUI界面创建完成,进入主循环")
root.mainloop()
except Exception as e:
print(f"[ERROR] 创建GUI时发生异常: {str(e)}")
import traceback
print(traceback.format_exc())
# 切换到摄像头实时识别模式
def switch_to_camera(canvas):
global current_mode, current_cap
@ -46,18 +57,18 @@ def switch_to_camera(canvas):
canvas.delete("all")
# 设置当前模式为摄像头并清空Canvas
current_cap = cv2.VideoCapture(0)
current_cap = cv2.VideoCapture(1)
current_cap.open(0)
if not current_cap.isOpened():
messagebox.showerror("Error", "Cannot open camera")
current_mode = None
return
# 启动摄像头
start_camera(canvas, current_cap)
# 传入canvas和current_cap
# 切换到视频流处理模式
def select_and_process_video(canvas, root):
global current_mode, current_cap
@ -108,5 +119,9 @@ def start_camera(canvas, cap):
show_frame(canvas, cap, gesture_processor)
# 启动摄像头进行实时手势识别
if __name__ == "__main__":
create_gui()

BIN
utils/__pycache__/finger_drawer.cpython-312.pyc View File


BIN
utils/__pycache__/finger_drawer.cpython-38.pyc View File


BIN
utils/__pycache__/gesture_data.cpython-312.pyc View File


BIN
utils/__pycache__/gesture_data.cpython-38.pyc View File


BIN
utils/__pycache__/hand_gesture.cpython-312.pyc View File


BIN
utils/__pycache__/hand_gesture.cpython-38.pyc View File


BIN
utils/__pycache__/index_finger.cpython-312.pyc View File


BIN
utils/__pycache__/index_finger.cpython-38.pyc View File


BIN
utils/__pycache__/kalman_filter.cpython-312.pyc View File


BIN
utils/__pycache__/kalman_filter.cpython-38.pyc View File


BIN
utils/__pycache__/model.cpython-312.pyc View File


BIN
utils/__pycache__/model.cpython-38.pyc View File


BIN
utils/__pycache__/process_images.cpython-312.pyc View File


BIN
utils/__pycache__/process_images.cpython-38.pyc View File


BIN
utils/__pycache__/video_recognition.cpython-312.pyc View File


BIN
utils/__pycache__/video_recognition.cpython-38.pyc View File


+ 78
- 44
utils/gesture_recognition.ipynb View File

@ -2,14 +2,16 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.761076Z",
"start_time": "2024-09-07T05:11:22.404354Z"
}
},
"collapsed": true
},
"outputs": [],
"source": [
"import cv2\n",
"import time\n",
@ -17,18 +19,19 @@
"import numpy as np\n",
"from collections import deque\n",
"from filterpy.kalman import KalmanFilter"
],
"outputs": [],
"execution_count": 1
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "40aada17ccd31fe",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.777139Z",
"start_time": "2024-09-07T05:11:28.761076Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"gesture_locked = {'Left':False,'Right':False}\n",
"gesture_start_time = {'Left':0,'Right':0}\n",
@ -48,19 +51,19 @@
"rect_draw_time = {'Left':0,'Right':0}\n",
"last_drawn_box = {'Left':None,'Right':None}\n",
"elapsed_time = {'Left':0,'Right':0}"
],
"id": "40aada17ccd31fe",
"outputs": [],
"execution_count": 2
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2ee9323bb1c25cc0",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.824573Z",
"start_time": "2024-09-07T05:11:28.777139Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"def clear_hand_states(detected_hand ='Both'):\n",
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
@ -86,19 +89,19 @@
" last_drawn_box[h] = None\n",
" elapsed_time[h] = 0\n",
" # 清空没被检测的手"
],
"id": "2ee9323bb1c25cc0",
"outputs": [],
"execution_count": 3
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96cf431d2562e7d",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.855831Z",
"start_time": "2024-09-07T05:11:28.824573Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"kalman_filters = {\n",
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
@ -127,19 +130,19 @@
" kf.x = np.array([x, y, 0., 0.])\n",
" kf.P *= 1000.\n",
" # 重置"
],
"id": "96cf431d2562e7d",
"outputs": [],
"execution_count": 4
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "edc274b7ed495122",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.887346Z",
"start_time": "2024-09-07T05:11:28.855831Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"\n",
"mp_hands = mediapipe.solutions.hands\n",
@ -154,19 +157,19 @@
"\n",
"mp_drawing = mediapipe.solutions.drawing_utils\n",
"clear_hand_states()"
],
"id": "edc274b7ed495122",
"outputs": [],
"execution_count": 5
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51ff809ecaf1f899",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.934274Z",
"start_time": "2024-09-07T05:11:28.887346Z"
}
},
"cell_type": "code",
"outputs": [],
"source": [
"def process_image(image):\n",
"\n",
@ -256,7 +259,7 @@
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
" rect = cv2.minAreaRect(contour)\n",
" box = cv2.boxPoints(rect)\n",
" box = np.int0(box)\n",
" box = np.int64(box)\n",
" rect_draw_time[temp_handness] = time.time()\n",
" last_drawn_box[temp_handness] = box\n",
" # 如果指向操作结束 轨迹列表有至少四个点的时候\n",
@ -334,19 +337,53 @@
" # 如果没检测到手就清空全部信息\n",
" \n",
" return image"
],
"id": "51ff809ecaf1f899",
"outputs": [],
"execution_count": 6
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7ce23e80ed36041",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:19:32.248575Z",
"start_time": "2024-09-07T05:11:28.934663Z"
}
},
"cell_type": "code",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\25055\\AppData\\Local\\Temp\\ipykernel_4200\\752492595.py:89: DeprecationWarning: `np.int0` is a deprecated alias for `np.intp`. (Deprecated NumPy 1.24)\n",
" box = np.int0(box)\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[7], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCamera Error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 8\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m frame \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mframe\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m cv2\u001b[38;5;241m.\u001b[39mimshow(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVideo\u001b[39m\u001b[38;5;124m'\u001b[39m, frame)\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cv2\u001b[38;5;241m.\u001b[39mwaitKey(\u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m&\u001b[39m \u001b[38;5;241m0xFF\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mord\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mq\u001b[39m\u001b[38;5;124m'\u001b[39m):\n",
"Cell \u001b[1;32mIn[6], line 9\u001b[0m, in \u001b[0;36mprocess_image\u001b[1;34m(image)\u001b[0m\n\u001b[0;32m 6\u001b[0m image \u001b[38;5;241m=\u001b[39m cv2\u001b[38;5;241m.\u001b[39mcvtColor(image, cv2\u001b[38;5;241m.\u001b[39mCOLOR_BGR2RGB)\n\u001b[0;32m 7\u001b[0m \u001b[38;5;66;03m# 预处理帧\u001b[39;00m\n\u001b[1;32m----> 9\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mhands\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mmulti_hand_landmarks:\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# 如果检测到手\u001b[39;00m\n\u001b[0;32m 14\u001b[0m handness_str \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
"File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solutions\\hands.py:153\u001b[0m, in \u001b[0;36mHands.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m 133\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the hand landmarks and handedness of each detected hand.\u001b[39;00m\n\u001b[0;32m 134\u001b[0m \n\u001b[0;32m 135\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;124;03m right hand) of the detected hand.\u001b[39;00m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solution_base.py:335\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 330\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m 331\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m 332\u001b[0m packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m 333\u001b[0m data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 335\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 336\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m 337\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m 338\u001b[0m solution_outputs \u001b[38;5;241m=\u001b[39m collections\u001b[38;5;241m.\u001b[39mnamedtuple(\n\u001b[0;32m 339\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSolutionOutputs\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info\u001b[38;5;241m.\u001b[39mkeys())\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m在当前单元格或上一个单元格中执行代码时 Kernel 崩溃。\n",
"\u001b[1;31m请查看单元格中的代码,以确定故障的可能原因。\n",
"\u001b[1;31m单击<a href='https://aka.ms/vscodeJupyterKernelCrash'>此处</a>了解详细信息。\n",
"\u001b[1;31m有关更多详细信息,请查看 Jupyter <a href='command:jupyter.viewOutput'>log</a>。"
]
}
],
"source": [
"cap = cv2.VideoCapture(1)\n",
"cap.open(0)\n",
@ -365,37 +402,34 @@
" \n",
"cap.release()\n",
"cv2.destroyAllWindows() "
],
"id": "b7ce23e80ed36041",
"outputs": [],
"execution_count": 7
]
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "10fca4bc34a944ea"
"id": "10fca4bc34a944ea",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "software_engineering",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.8.20"
}
},
"nbformat": 4,

+ 1
- 1
utils/hand_gesture.py View File

@ -3,7 +3,7 @@ from model import HandTracker
from index_finger import IndexFingerHandler
from gesture_data import HandState
from kalman_filter import KalmanHandler
from utils.finger_drawer import FingerDrawer
from finger_drawer import FingerDrawer
class HandGestureHandler:
def __init__(self):

+ 1
- 1
utils/index_finger.py View File

@ -78,7 +78,7 @@ class IndexFingerHandler:
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
box = np.int64(box)
# 当拖拽点数大于4时则计算最小外接矩形
self.hand_state.rect_draw_time[temp_handness] = time.time()
self.hand_state.last_drawn_box[temp_handness] = box

+ 0
- 278
utils/main.py View File

@ -1,278 +0,0 @@
import cv2
import time
import mediapipe
import numpy as np
from collections import deque
from filterpy.kalman import KalmanFilter
gesture_locked = {'Left':False,'Right':False}
gesture_start_time = {'Left':0,'Right':0}
buffer_start_time = {'Left':0,'Right':0}
start_drag_time = {'Left':0,'Right':0}
dragging = {'Left':False,'Right':False}
drag_point = {'Left':(0, 0),'Right':(0, 0)}
buffer_duration = {'Left':0.25,'Right':0.25}
is_index_finger_up = {'Left':False,'Right':False}
index_finger_second = {'Left':0,'Right':0}
index_finger_tip = {'Left':0,'Right':0}
trajectory = {'Left':[],'Right':[]}
square_queue = deque()
wait_time = 1.5
kalman_wait_time = 0.5
wait_box = 2
rect_draw_time = {'Left':0,'Right':0}
last_drawn_box = {'Left':None,'Right':None}
elapsed_time = {'Left':0,'Right':0}
def clear_hand_states(detected_hand ='Both'):
global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time
hands_to_clear = {'Left', 'Right'}
if detected_hand == 'Both':
hands_to_clear = hands_to_clear
else:
hands_to_clear -= {detected_hand}
# 反向判断左右手
for h in hands_to_clear:
gesture_locked[h] = False
gesture_start_time[h] = 0
buffer_start_time[h] = 0
dragging[h] = False
drag_point[h] = (0, 0)
buffer_duration[h] = 0.25
is_index_finger_up[h] = False
trajectory[h].clear()
start_drag_time[h] = 0
rect_draw_time[h] = 0
last_drawn_box[h] = None
elapsed_time[h] = 0
# 清空没被检测的手
kalman_filters = {
'Left': KalmanFilter(dim_x=4, dim_z=2),
'Right': KalmanFilter(dim_x=4, dim_z=2)
}
for key in kalman_filters:
kalman_filters[key].x = np.array([0., 0., 0., 0.])
kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])
# 状态转移矩阵
kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
# 观测矩阵
kalman_filters[key].P *= 1000.
kalman_filters[key].R = 3
kalman_filters[key].Q = np.eye(4) * 0.01
def kalman_filter_point(hand_label, x, y):
kf = kalman_filters[hand_label]
kf.predict()
kf.update([x, y])
# 更新状态
return (kf.x[0], kf.x[1])
def reset_kalman_filter(hand_label, x, y):
kf = kalman_filters[hand_label]
kf.x = np.array([x, y, 0., 0.])
kf.P *= 1000.
# 重置
mp_hands = mediapipe.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
# 一只更稳定
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
mp_drawing = mediapipe.solutions.drawing_utils
clear_hand_states()
def process_image(image):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 预处理帧
results = hands.process(image)
if results.multi_hand_landmarks:
# 如果检测到手
handness_str = ''
index_finger_tip_str = ''
if len(results.multi_hand_landmarks) == 1:
clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)
# 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定
for hand_idx in range(len(results.multi_hand_landmarks)):
hand_21 = results.multi_hand_landmarks[hand_idx]
mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)
temp_handness = results.multi_handedness[hand_idx].classification[0].label
handness_str += '{}:{}, '.format(hand_idx, temp_handness)
is_index_finger_up[temp_handness] = False
# 先设置为false 防止放下被错误更新为竖起
cz0 = hand_21.landmark[0].z
index_finger_second[temp_handness] = hand_21.landmark[7]
index_finger_tip[temp_handness] = hand_21.landmark[8]
# 食指指尖和第一个关节
index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)
if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:
is_index_finger_up[temp_handness] = True
# 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作
if is_index_finger_up[temp_handness]:
if not gesture_locked[temp_handness]:
if gesture_start_time[temp_handness] == 0:
gesture_start_time[temp_handness] = time.time()
# 记录食指抬起的时间
elif time.time() - gesture_start_time[temp_handness] > wait_time:
dragging[temp_handness] = True
gesture_locked[temp_handness] = True
drag_point[temp_handness] = (index_x, index_y)
# 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作
buffer_start_time[temp_handness] = 0
# 检测到食指竖起就刷新缓冲时间
else:
if buffer_start_time[temp_handness] == 0:
buffer_start_time[temp_handness] = time.time()
elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:
gesture_start_time[temp_handness] = 0
gesture_locked[temp_handness] = False
dragging[temp_handness] = False
# 如果缓冲时间大于设定 就证明已经结束指向操作
# 这样可以防止某一帧识别有误导致指向操作被错误清除
if dragging[temp_handness]:
if start_drag_time[temp_handness] == 0:
start_drag_time[temp_handness] = time.time()
reset_kalman_filter(temp_handness, index_x, index_y)
# 每次画线的时候初始化滤波器
smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)
drag_point[temp_handness] = (index_x, index_y)
index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)
cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
# 根据离掌根的深度距离来构建一个圆
# 用来显示已经开始指向操作
# 和下方构建的深度点位对应 直接用倍数
drag_point_smooth = (smooth_x, smooth_y)
if time.time() - start_drag_time[temp_handness] > kalman_wait_time:
trajectory[temp_handness].append(drag_point_smooth)
# 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声
# 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中
else:
if len(trajectory[temp_handness]) > 4:
contour = np.array(trajectory[temp_handness], dtype=np.int32)
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
rect_draw_time[temp_handness] = time.time()
last_drawn_box[temp_handness] = box
# 如果指向操作结束 轨迹列表有至少四个点的时候
# 使用最小包围图形将画的不规则图案调整为一个矩形
start_drag_time[temp_handness] = 0
trajectory[temp_handness].clear()
for i in range(1, len(trajectory[temp_handness])):
pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))
pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))
cv2.line(image, pt1, pt2, (0, 0, 255), 2)
# 绘制连接轨迹点的线
if last_drawn_box[temp_handness] is not None:
elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]
if elapsed_time[temp_handness] < wait_box:
cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
# 将矩形框保留一段时间 否则一帧太快 无法看清效果
elif elapsed_time[temp_handness] >= wait_box - 0.1:
box = last_drawn_box[temp_handness]
x_min = max(0, min(box[:, 0]))
y_min = max(0, min(box[:, 1]))
x_max = min(image.shape[1], max(box[:, 0]))
y_max = min(image.shape[0], max(box[:, 1]))
cropped_image = image[y_min:y_max, x_min:x_max]
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
cv2.imwrite(filename, cropped_image)
last_drawn_box[temp_handness] = None
# 不能直接剪裁画完的图像 可能会错误的将手剪裁进去
# 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来
for i in range(21):
cx = int(hand_21.landmark[i].x * width)
cy = int(hand_21.landmark[i].y * height)
cz = hand_21.landmark[i].z
depth_z = cz0 - cz
radius = max(int(6 * (1 + depth_z*5)), 0)
if i == 0:
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
if i == 8:
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)
if i in [1,5,9,13,17]:
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
if i in [2,6,10,14,18]:
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
if i in [3,7,11,15,19]:
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
if i in [4,12,16,20]:
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
# 提取出每一个关节点 赋予对应的颜色和根据掌根的深度
scaler= 1
image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)
image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)
spend_time = time.time() - start_time
if spend_time > 0:
FPS = 1.0 / spend_time
else:
FPS = 0
image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)
# 显示FPS 检测到的手和食指指尖对于掌根的深度值
else:
clear_hand_states()
# 如果没检测到手就清空全部信息
return image
########################################
cap = cv2.VideoCapture(1)
cap.open(0)
while cap.isOpened():
success, frame = cap.read()
if not success:
print("Camera Error")
break
frame = process_image(frame)
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

Loading…
Cancel
Save