Browse Source

load_other_pro

finalv3
ydw 2 months ago
parent
commit
213792c936
50 changed files with 1935 additions and 0 deletions
  1. +21
    -0
      src_screenshot/LICENSE
  2. BIN
      src_screenshot/image/cropped_Right_1753179393.jpg
  3. BIN
      src_screenshot/image/cropped_Right_1753179532.jpg
  4. BIN
      src_screenshot/image/cropped_Right_1753179605.jpg
  5. +0
    -0
      src_screenshot/main.py
  6. +127
    -0
      src_screenshot/utils/GUI.py
  7. +0
    -0
      src_screenshot/utils/__init__.py
  8. BIN
      src_screenshot/utils/__pycache__/finger_drawer.cpython-312.pyc
  9. BIN
      src_screenshot/utils/__pycache__/finger_drawer.cpython-38.pyc
  10. BIN
      src_screenshot/utils/__pycache__/gesture_data.cpython-312.pyc
  11. BIN
      src_screenshot/utils/__pycache__/gesture_data.cpython-38.pyc
  12. BIN
      src_screenshot/utils/__pycache__/hand_gesture.cpython-312.pyc
  13. BIN
      src_screenshot/utils/__pycache__/hand_gesture.cpython-38.pyc
  14. BIN
      src_screenshot/utils/__pycache__/index_finger.cpython-312.pyc
  15. BIN
      src_screenshot/utils/__pycache__/index_finger.cpython-38.pyc
  16. BIN
      src_screenshot/utils/__pycache__/kalman_filter.cpython-312.pyc
  17. BIN
      src_screenshot/utils/__pycache__/kalman_filter.cpython-38.pyc
  18. BIN
      src_screenshot/utils/__pycache__/model.cpython-312.pyc
  19. BIN
      src_screenshot/utils/__pycache__/model.cpython-38.pyc
  20. BIN
      src_screenshot/utils/__pycache__/process_images.cpython-312.pyc
  21. BIN
      src_screenshot/utils/__pycache__/process_images.cpython-38.pyc
  22. BIN
      src_screenshot/utils/__pycache__/video_recognition.cpython-312.pyc
  23. BIN
      src_screenshot/utils/__pycache__/video_recognition.cpython-38.pyc
  24. +34
    -0
      src_screenshot/utils/finger_drawer.py
  25. +43
    -0
      src_screenshot/utils/gesture_data.py
  26. +24
    -0
      src_screenshot/utils/gesture_process.py
  27. +437
    -0
      src_screenshot/utils/gesture_recognition.ipynb
  28. +56
    -0
      src_screenshot/utils/hand_gesture.py
  29. +112
    -0
      src_screenshot/utils/index_finger.py
  30. +36
    -0
      src_screenshot/utils/kalman_filter.py
  31. +17
    -0
      src_screenshot/utils/model.py
  32. +24
    -0
      src_screenshot/utils/process_images.py
  33. +65
    -0
      src_screenshot/utils/video_recognition.py
  34. +2
    -0
      src_voice_tip/README.md
  35. +3
    -0
      src_voice_tip/requirements.txt
  36. +220
    -0
      src_voice_tip/src/voice.py
  37. +6
    -0
      src_voice_tip/vosk-model-small-cn-0.22/README
  38. BIN
      src_voice_tip/vosk-model-small-cn-0.22/am/final.mdl
  39. +8
    -0
      src_voice_tip/vosk-model-small-cn-0.22/conf/mfcc.conf
  40. +10
    -0
      src_voice_tip/vosk-model-small-cn-0.22/conf/model.conf
  41. BIN
      src_voice_tip/vosk-model-small-cn-0.22/graph/Gr.fst
  42. BIN
      src_voice_tip/vosk-model-small-cn-0.22/graph/HCLr.fst
  43. +39
    -0
      src_voice_tip/vosk-model-small-cn-0.22/graph/disambig_tid.int
  44. +646
    -0
      src_voice_tip/vosk-model-small-cn-0.22/graph/phones/word_boundary.int
  45. BIN
      src_voice_tip/vosk-model-small-cn-0.22/ivector/final.dubm
  46. BIN
      src_voice_tip/vosk-model-small-cn-0.22/ivector/final.ie
  47. BIN
      src_voice_tip/vosk-model-small-cn-0.22/ivector/final.mat
  48. +3
    -0
      src_voice_tip/vosk-model-small-cn-0.22/ivector/global_cmvn.stats
  49. +0
    -0
      src_voice_tip/vosk-model-small-cn-0.22/ivector/online_cmvn.conf
  50. +2
    -0
      src_voice_tip/vosk-model-small-cn-0.22/ivector/splice.conf

+ 21
- 0
src_screenshot/LICENSE View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 EzraZephyr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

BIN
src_screenshot/image/cropped_Right_1753179393.jpg View File

Before After
Width: 275  |  Height: 254  |  Size: 26 KiB

BIN
src_screenshot/image/cropped_Right_1753179532.jpg View File

Before After
Width: 268  |  Height: 257  |  Size: 15 KiB

BIN
src_screenshot/image/cropped_Right_1753179605.jpg View File

Before After
Width: 585  |  Height: 384  |  Size: 59 KiB

+ 0
- 0
src_screenshot/main.py View File


+ 127
- 0
src_screenshot/utils/GUI.py View File

@ -0,0 +1,127 @@
import cv2
import tkinter as tk
from tkinter import filedialog, messagebox
from video_recognition import start_camera, upload_and_process_video, show_frame
from process_images import HandGestureProcessor
current_mode = None
current_cap = None
# 用于追踪当前模式和摄像头资源
# 初始化图形界面主要的逻辑
def create_gui():
try:
print("开始创建GUI界面")
root = tk.Tk()
root.title("Gesture Recognition")
root.geometry("800x600")
print("GUI窗口创建成功")
canvas = tk.Canvas(root, width=640, height=480)
canvas.pack()
print("画布创建成功")
camera_button = tk.Button(
root,
text="Use Camera for Real-time Recognition",
command=lambda: switch_to_camera(canvas)
)
camera_button.pack(pady=10)
print("摄像头按钮创建成功")
video_button = tk.Button(
root,
text="Upload Video File for Processing",
command=lambda: select_and_process_video(canvas, root)
)
video_button.pack(pady=10)
print("视频上传按钮创建成功")
print("GUI界面创建完成,进入主循环")
root.mainloop()
except Exception as e:
print(f"[ERROR] 创建GUI时发生异常: {str(e)}")
import traceback
print(traceback.format_exc())
# 切换到摄像头实时识别模式
def switch_to_camera(canvas):
global current_mode, current_cap
stop_current_operation()
# 停止当前操作并释放摄像头
current_mode = "camera"
canvas.delete("all")
# 设置当前模式为摄像头并清空Canvas
current_cap = cv2.VideoCapture(1)
current_cap.open(0)
if not current_cap.isOpened():
messagebox.showerror("Error", "Cannot open camera")
current_mode = None
return
# 启动摄像头
start_camera(canvas, current_cap)
# 传入canvas和current_cap
# 切换到视频流处理模式
def select_and_process_video(canvas, root):
global current_mode, current_cap
stop_current_operation()
current_mode = "video"
canvas.delete("all")
video_path = filedialog.askopenfilename(
title="Select a Video File",
filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
)
# 选择视频文件
if video_path:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
messagebox.showerror("Error", "Cannot open video file")
return
# 获取视频的宽高并调整 Canvas 大小
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
canvas.config(width=frame_width, height=frame_height)
root.geometry(f"{frame_width + 160}x{frame_height + 200}") # 调整窗口大小
# 获取视频宽高并动态调整canvas的大小
error_message = upload_and_process_video(canvas, video_path)
if error_message:
messagebox.showerror("Error", error_message)
# 上传并处理视频文件
def stop_current_operation():
global current_cap
if current_cap and current_cap.isOpened():
current_cap.release()
cv2.destroyAllWindows()
current_cap = None
# 停止当前操作 释放摄像头资源并关闭所有窗口
def start_camera(canvas, cap):
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
# 启动摄像头进行实时手势识别
if __name__ == "__main__":
create_gui()

+ 0
- 0
src_screenshot/utils/__init__.py View File


BIN
src_screenshot/utils/__pycache__/finger_drawer.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/finger_drawer.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/gesture_data.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/gesture_data.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/hand_gesture.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/hand_gesture.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/index_finger.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/index_finger.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/kalman_filter.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/kalman_filter.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/model.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/model.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/process_images.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/process_images.cpython-38.pyc View File


BIN
src_screenshot/utils/__pycache__/video_recognition.cpython-312.pyc View File


BIN
src_screenshot/utils/__pycache__/video_recognition.cpython-38.pyc View File


+ 34
- 0
src_screenshot/utils/finger_drawer.py View File

@ -0,0 +1,34 @@
import cv2
class FingerDrawer:
@staticmethod
def draw_finger_points(image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
index_finger_tip_str = ''
for i in range(21):
cx = int(hand_21.landmark[i].x * width)
cy = int(hand_21.landmark[i].y * height)
cz = hand_21.landmark[i].z
depth_z = cz0 - cz
radius = max(int(6 * (1 + depth_z * 5)), 0)
# 根据深度调整圆点的半径
if i == 0:
image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
elif i == 8:
image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
elif i in [1, 5, 9, 13, 17]:
image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
elif i in [2, 6, 10, 14, 18]:
image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
elif i in [3, 7, 11, 15, 19]:
image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
elif i in [4, 12, 16, 20]:
image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
# 根据每组关节绘制不同颜色的圆点 同时根据距离掌根的深度信息进行调整
return image, index_finger_tip_str

+ 43
- 0
src_screenshot/utils/gesture_data.py View File

@ -0,0 +1,43 @@
from collections import deque
class HandState:
def __init__(self):
self.gesture_locked = {'Left': False, 'Right': False}
self.gesture_start_time = {'Left': 0, 'Right': 0}
self.buffer_start_time = {'Left': 0, 'Right': 0}
self.start_drag_time = {'Left': 0, 'Right': 0}
self.dragging = {'Left': False, 'Right': False}
self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
self.is_index_finger_up = {'Left': False, 'Right': False}
self.index_finger_second = {'Left': 0, 'Right': 0}
self.index_finger_tip = {'Left': 0, 'Right': 0}
self.trajectory = {'Left': [], 'Right': []}
self.square_queue = deque()
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
self.rect_draw_time = {'Left': 0, 'Right': 0}
self.last_drawn_box = {'Left': None, 'Right': None}
def clear_hand_states(self, detected_hand='Both'):
hands_to_clear = {'Left', 'Right'}
if detected_hand == 'Both':
hands_to_clear = hands_to_clear
else:
hands_to_clear -= {detected_hand}
for h in hands_to_clear:
self.gesture_locked[h] = False
self.gesture_start_time[h] = 0
self.buffer_start_time[h] = 0
self.dragging[h] = False
self.drag_point[h] = (0, 0)
self.buffer_duration[h] = 0.25
self.is_index_finger_up[h] = False
self.trajectory[h].clear()
self.start_drag_time[h] = 0
self.rect_draw_time[h] = 0
self.last_drawn_box[h] = None
# 用于记录左右手的信息 需要分开存放 否则可能会出现数据冲突

+ 24
- 0
src_screenshot/utils/gesture_process.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 获取图像尺寸 翻转并转换颜色空间
image = self.hand_handler.handle_hand_gestures(image, width, height)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# 计算并显示帧率
return image

+ 437
- 0
src_screenshot/utils/gesture_recognition.ipynb View File

@ -0,0 +1,437 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.761076Z",
"start_time": "2024-09-07T05:11:22.404354Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import cv2\n",
"import time\n",
"import mediapipe\n",
"import numpy as np\n",
"from collections import deque\n",
"from filterpy.kalman import KalmanFilter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "40aada17ccd31fe",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.777139Z",
"start_time": "2024-09-07T05:11:28.761076Z"
}
},
"outputs": [],
"source": [
"gesture_locked = {'Left':False,'Right':False}\n",
"gesture_start_time = {'Left':0,'Right':0}\n",
"buffer_start_time = {'Left':0,'Right':0}\n",
"start_drag_time = {'Left':0,'Right':0}\n",
"dragging = {'Left':False,'Right':False}\n",
"drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
"buffer_duration = {'Left':0.25,'Right':0.25}\n",
"is_index_finger_up = {'Left':False,'Right':False}\n",
"index_finger_second = {'Left':0,'Right':0}\n",
"index_finger_tip = {'Left':0,'Right':0}\n",
"trajectory = {'Left':[],'Right':[]}\n",
"square_queue = deque()\n",
"wait_time = 1.5\n",
"kalman_wait_time = 0.5\n",
"wait_box = 2\n",
"rect_draw_time = {'Left':0,'Right':0}\n",
"last_drawn_box = {'Left':None,'Right':None}\n",
"elapsed_time = {'Left':0,'Right':0}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2ee9323bb1c25cc0",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.824573Z",
"start_time": "2024-09-07T05:11:28.777139Z"
}
},
"outputs": [],
"source": [
"def clear_hand_states(detected_hand ='Both'):\n",
" global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
" \n",
" hands_to_clear = {'Left', 'Right'}\n",
" if detected_hand == 'Both':\n",
" hands_to_clear = hands_to_clear\n",
" else:\n",
" hands_to_clear -= {detected_hand}\n",
" # 反向判断左右手\n",
"\n",
" for h in hands_to_clear:\n",
" gesture_locked[h] = False\n",
" gesture_start_time[h] = 0\n",
" buffer_start_time[h] = 0\n",
" dragging[h] = False\n",
" drag_point[h] = (0, 0)\n",
" buffer_duration[h] = 0.25\n",
" is_index_finger_up[h] = False\n",
" trajectory[h].clear()\n",
" start_drag_time[h] = 0\n",
" rect_draw_time[h] = 0\n",
" last_drawn_box[h] = None\n",
" elapsed_time[h] = 0\n",
" # 清空没被检测的手"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "96cf431d2562e7d",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.855831Z",
"start_time": "2024-09-07T05:11:28.824573Z"
}
},
"outputs": [],
"source": [
"kalman_filters = {\n",
" 'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
" 'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
"}\n",
"\n",
"for key in kalman_filters:\n",
" kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
" kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])\n",
" # 状态转移矩阵\n",
" kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])\n",
" # 观测矩阵\n",
" kalman_filters[key].P *= 1000.\n",
" kalman_filters[key].R = 3\n",
" kalman_filters[key].Q = np.eye(4) * 0.01\n",
"\n",
"def kalman_filter_point(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.predict()\n",
" kf.update([x, y])\n",
" # 更新状态\n",
" return (kf.x[0], kf.x[1])\n",
"\n",
"def reset_kalman_filter(hand_label, x, y):\n",
" kf = kalman_filters[hand_label]\n",
" kf.x = np.array([x, y, 0., 0.])\n",
" kf.P *= 1000.\n",
" # 重置"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "edc274b7ed495122",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.887346Z",
"start_time": "2024-09-07T05:11:28.855831Z"
}
},
"outputs": [],
"source": [
"\n",
"mp_hands = mediapipe.solutions.hands\n",
"\n",
"hands = mp_hands.Hands(\n",
" static_image_mode=False,\n",
" max_num_hands=2,\n",
" # 一只更稳定\n",
" min_detection_confidence=0.5,\n",
" min_tracking_confidence=0.5\n",
")\n",
"\n",
"mp_drawing = mediapipe.solutions.drawing_utils\n",
"clear_hand_states()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51ff809ecaf1f899",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:11:28.934274Z",
"start_time": "2024-09-07T05:11:28.887346Z"
}
},
"outputs": [],
"source": [
"def process_image(image):\n",
"\n",
" start_time = time.time()\n",
" height, width = image.shape[:2]\n",
" image = cv2.flip(image, 1)\n",
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
" # 预处理帧\n",
" \n",
" results = hands.process(image)\n",
" \n",
" if results.multi_hand_landmarks:\n",
" # 如果检测到手\n",
" \n",
" handness_str = ''\n",
" index_finger_tip_str = ''\n",
" \n",
" if len(results.multi_hand_landmarks) == 1:\n",
" clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)\n",
" # 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定\n",
" \n",
" for hand_idx in range(len(results.multi_hand_landmarks)):\n",
" \n",
" hand_21 = results.multi_hand_landmarks[hand_idx]\n",
" mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
" \n",
" temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
" handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
" is_index_finger_up[temp_handness] = False\n",
" # 先设置为false 防止放下被错误更新为竖起\n",
" \n",
" cz0 = hand_21.landmark[0].z\n",
" index_finger_second[temp_handness] = hand_21.landmark[7]\n",
" index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
" # 食指指尖和第一个关节\n",
" \n",
" index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
"\n",
" if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
" is_index_finger_up[temp_handness] = True\n",
" # 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作 \n",
"\n",
" if is_index_finger_up[temp_handness]:\n",
" if not gesture_locked[temp_handness]:\n",
" if gesture_start_time[temp_handness] == 0:\n",
" gesture_start_time[temp_handness] = time.time()\n",
" # 记录食指抬起的时间\n",
" elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
" dragging[temp_handness] = True\n",
" gesture_locked[temp_handness] = True\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" # 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作\n",
" buffer_start_time[temp_handness] = 0\n",
" # 检测到食指竖起就刷新缓冲时间\n",
" else:\n",
" if buffer_start_time[temp_handness] == 0:\n",
" buffer_start_time[temp_handness] = time.time()\n",
" elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
" gesture_start_time[temp_handness] = 0\n",
" gesture_locked[temp_handness] = False\n",
" dragging[temp_handness] = False\n",
" # 如果缓冲时间大于设定 就证明已经结束指向操作\n",
" # 这样可以防止某一帧识别有误导致指向操作被错误清除\n",
" \n",
" if dragging[temp_handness]:\n",
"\n",
" if start_drag_time[temp_handness] == 0:\n",
" start_drag_time[temp_handness] = time.time()\n",
" reset_kalman_filter(temp_handness, index_x, index_y)\n",
" # 每次画线的时候初始化滤波器\n",
" \n",
" smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
" drag_point[temp_handness] = (index_x, index_y)\n",
" index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
" cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
" # 根据离掌根的深度距离来构建一个圆\n",
" # 用来显示已经开始指向操作\n",
" # 和下方构建的深度点位对应 直接用倍数\n",
" drag_point_smooth = (smooth_x, smooth_y)\n",
" \n",
" if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
" trajectory[temp_handness].append(drag_point_smooth)\n",
" # 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声\n",
" # 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中\n",
" else:\n",
" if len(trajectory[temp_handness]) > 4:\n",
" contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
" rect = cv2.minAreaRect(contour)\n",
" box = cv2.boxPoints(rect)\n",
" box = np.int64(box)\n",
" rect_draw_time[temp_handness] = time.time()\n",
" last_drawn_box[temp_handness] = box\n",
" # 如果指向操作结束 轨迹列表有至少四个点的时候\n",
" # 使用最小包围图形将画的不规则图案调整为一个矩形\n",
"\n",
" start_drag_time[temp_handness] = 0\n",
" trajectory[temp_handness].clear()\n",
"\n",
" for i in range(1, len(trajectory[temp_handness])):\n",
"\n",
" pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
" pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
" cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
" # 绘制连接轨迹点的线\n",
"\n",
" if last_drawn_box[temp_handness] is not None:\n",
" elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
" \n",
" if elapsed_time[temp_handness] < wait_box:\n",
" cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
" # 将矩形框保留一段时间 否则一帧太快 无法看清效果\n",
" \n",
" elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
" \n",
" box = last_drawn_box[temp_handness]\n",
" x_min = max(0, min(box[:, 0]))\n",
" y_min = max(0, min(box[:, 1]))\n",
" x_max = min(image.shape[1], max(box[:, 0]))\n",
" y_max = min(image.shape[0], max(box[:, 1]))\n",
" cropped_image = image[y_min:y_max, x_min:x_max]\n",
" filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
" cv2.imwrite(filename, cropped_image)\n",
" last_drawn_box[temp_handness] = None\n",
" # 不能直接剪裁画完的图像 可能会错误的将手剪裁进去\n",
" # 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来\n",
" \n",
" for i in range(21):\n",
" \n",
" cx = int(hand_21.landmark[i].x * width)\n",
" cy = int(hand_21.landmark[i].y * height)\n",
" cz = hand_21.landmark[i].z\n",
" depth_z = cz0 - cz\n",
" radius = max(int(6 * (1 + depth_z*5)), 0)\n",
" \n",
" if i == 0:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
" if i == 8:\n",
" image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
" index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
" if i in [1,5,9,13,17]: \n",
" image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)\n",
" if i in [2,6,10,14,18]:\n",
" image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)\n",
" if i in [3,7,11,15,19]:\n",
" image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
" if i in [4,12,16,20]:\n",
" image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
" # 提取出每一个关节点 赋予对应的颜色和根据掌根的深度\n",
" \n",
" scaler= 1\n",
" image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
" image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
"\n",
" spend_time = time.time() - start_time\n",
" if spend_time > 0:\n",
" FPS = 1.0 / spend_time\n",
" else:\n",
" FPS = 0\n",
" \n",
" image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)\n",
" # 显示FPS 检测到的手和食指指尖对于掌根的深度值\n",
" \n",
" else:\n",
" clear_hand_states()\n",
" # 如果没检测到手就清空全部信息\n",
" \n",
" return image"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7ce23e80ed36041",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-07T05:19:32.248575Z",
"start_time": "2024-09-07T05:11:28.934663Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\25055\\AppData\\Local\\Temp\\ipykernel_4200\\752492595.py:89: DeprecationWarning: `np.int0` is a deprecated alias for `np.intp`. (Deprecated NumPy 1.24)\n",
" box = np.int0(box)\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[7], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCamera Error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 8\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m frame \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mframe\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m cv2\u001b[38;5;241m.\u001b[39mimshow(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVideo\u001b[39m\u001b[38;5;124m'\u001b[39m, frame)\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cv2\u001b[38;5;241m.\u001b[39mwaitKey(\u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m&\u001b[39m \u001b[38;5;241m0xFF\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mord\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mq\u001b[39m\u001b[38;5;124m'\u001b[39m):\n",
"Cell \u001b[1;32mIn[6], line 9\u001b[0m, in \u001b[0;36mprocess_image\u001b[1;34m(image)\u001b[0m\n\u001b[0;32m 6\u001b[0m image \u001b[38;5;241m=\u001b[39m cv2\u001b[38;5;241m.\u001b[39mcvtColor(image, cv2\u001b[38;5;241m.\u001b[39mCOLOR_BGR2RGB)\n\u001b[0;32m 7\u001b[0m \u001b[38;5;66;03m# 预处理帧\u001b[39;00m\n\u001b[1;32m----> 9\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mhands\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mmulti_hand_landmarks:\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# 如果检测到手\u001b[39;00m\n\u001b[0;32m 14\u001b[0m handness_str \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
"File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solutions\\hands.py:153\u001b[0m, in \u001b[0;36mHands.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m 133\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the hand landmarks and handedness of each detected hand.\u001b[39;00m\n\u001b[0;32m 134\u001b[0m \n\u001b[0;32m 135\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;124;03m right hand) of the detected hand.\u001b[39;00m\n\u001b[0;32m 151\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solution_base.py:335\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 330\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m 331\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m 332\u001b[0m packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m 333\u001b[0m data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 335\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 336\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m 337\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m 338\u001b[0m solution_outputs \u001b[38;5;241m=\u001b[39m collections\u001b[38;5;241m.\u001b[39mnamedtuple(\n\u001b[0;32m 339\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSolutionOutputs\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info\u001b[38;5;241m.\u001b[39mkeys())\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m在当前单元格或上一个单元格中执行代码时 Kernel 崩溃。\n",
"\u001b[1;31m请查看单元格中的代码,以确定故障的可能原因。\n",
"\u001b[1;31m单击<a href='https://aka.ms/vscodeJupyterKernelCrash'>此处</a>了解详细信息。\n",
"\u001b[1;31m有关更多详细信息,请查看 Jupyter <a href='command:jupyter.viewOutput'>log</a>。"
]
}
],
"source": [
"cap = cv2.VideoCapture(1)\n",
"cap.open(0)\n",
"\n",
"while cap.isOpened():\n",
" success, frame = cap.read()\n",
" if not success:\n",
" print(\"Camera Error\")\n",
" break\n",
" \n",
" frame = process_image(frame)\n",
" cv2.imshow('Video', frame)\n",
" \n",
" if cv2.waitKey(1) & 0xFF == ord('q'):\n",
" break\n",
" \n",
"cap.release()\n",
"cv2.destroyAllWindows() "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10fca4bc34a944ea",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "software_engineering",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.20"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

+ 56
- 0
src_screenshot/utils/hand_gesture.py View File

@ -0,0 +1,56 @@
import cv2
from model import HandTracker
from index_finger import IndexFingerHandler
from gesture_data import HandState
from kalman_filter import KalmanHandler
from finger_drawer import FingerDrawer
class HandGestureHandler:
def __init__(self):
self.hand_state = HandState()
self.kalman_handler = KalmanHandler()
self.hand_tracker = HandTracker()
self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)
def handle_hand_gestures(self, image, width, height, is_video):
results = self.hand_tracker.process(image)
if results.multi_hand_landmarks:
handness_str = ''
index_finger_tip_str = ''
if len(results.multi_hand_landmarks) == 1:
detected_hand = results.multi_handedness[0].classification[0].label
self.hand_state.clear_hand_states(detected_hand)
# 如果只检测到了一只手 那么就清空另一只手的信息 以免第二只手出现的时候数据冲突
for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
self.hand_tracker.mp_drawing.draw_landmarks(
image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
)
# 绘制手部关键点连接
temp_handness = results.multi_handedness[hand_idx].classification[0].label
handness_str += f'{hand_idx}:{temp_handness}, '
self.hand_state.is_index_finger_up[temp_handness] = False
image = self.index_handler.handle_index_finger(
image, hand_21, temp_handness, width, height
)
# 处理食指
image, index_finger_tip_str = FingerDrawer.draw_finger_points(image, hand_21, temp_handness, width, height)
if is_video:
image = cv2.flip(image, 1)
image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
else:
if is_video:
image = cv2.flip(image, 1)
# 如果是后置摄像头的输入视频,则需要在处理前翻转图像,确保手势检测的左右手正确;
# 处理完毕后再翻转回来,以防止最终输出的图像出现镜像错误。
self.hand_state.clear_hand_states()
# 如果未检测到手 则清空手部状态
return image

+ 112
- 0
src_screenshot/utils/index_finger.py View File

@ -0,0 +1,112 @@
import cv2
import time
import numpy as np
class IndexFingerHandler:
def __init__(self, hand_state, kalman_handler):
self.hand_state = hand_state
self.kalman_handler = kalman_handler
self.wait_time = 1.5
self.kalman_wait_time = 0.5
self.wait_box = 2
def handle_index_finger(self, image, hand_21, temp_handness, width, height):
cz0 = hand_21.landmark[0].z
self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]
index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)
self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)
return image
# 处理食指的状态和手势效果,并更新图像
def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):
if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
for i in range(21) if i not in [7, 8]) and \
self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
self.hand_state.is_index_finger_up[temp_handness] = True
# 如果食指指尖和第一个关节都大于其他关键点 则判定为食指抬起
if self.hand_state.is_index_finger_up[temp_handness]:
if not self.hand_state.gesture_locked[temp_handness]:
if self.hand_state.gesture_start_time[temp_handness] == 0:
self.hand_state.gesture_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
self.hand_state.dragging[temp_handness] = True
self.hand_state.gesture_locked[temp_handness] = True
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
# 如果食指指向操作已经超过了等待的时间 则设定为正式进行指向操作
self.hand_state.buffer_start_time[temp_handness] = 0
# 防止识别错误导致指向操作迅速中断的缓冲时间
else:
if self.hand_state.buffer_start_time[temp_handness] == 0:
self.hand_state.buffer_start_time[temp_handness] = time.time()
elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
self.hand_state.gesture_start_time[temp_handness] = 0
self.hand_state.gesture_locked[temp_handness] = False
self.hand_state.dragging[temp_handness] = False
# 如果食指指向操作的中断时间已经超过了设定的缓冲时间 则正式终断
def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):
if self.hand_state.dragging[temp_handness]:
if self.hand_state.start_drag_time[temp_handness] == 0:
self.hand_state.start_drag_time[temp_handness] = time.time()
self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
# 如果是首次操作 则记录时间并重置kalman滤波器
smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
# 使用kalman滤波器平滑生成的轨迹 减少噪声和抖动
self.hand_state.drag_point[temp_handness] = (index_x, index_y)
index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
# 根据离掌根的距离同步调整圆圈大小 但是要比FingerDrawer的同比增大一些 可以看清是否锁定指向操作
drag_point_smooth = (smooth_x, smooth_y)
if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
# 因为滤波器初始化时需要时间稳定数据 所以等待其稳定后再将坐标点加到轨迹中
else:
if len(self.hand_state.trajectory[temp_handness]) > 4:
contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int64(box)
# 当拖拽点数大于4时则计算最小外接矩形
self.hand_state.rect_draw_time[temp_handness] = time.time()
self.hand_state.last_drawn_box[temp_handness] = box
self.hand_state.start_drag_time[temp_handness] = 0
self.hand_state.trajectory[temp_handness].clear()
# 重置 清空
for i in range(1, len(self.hand_state.trajectory[temp_handness])):
pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
cv2.line(image, pt1, pt2, (0, 0, 255), 2)
# 绘制拖拽路径
if self.hand_state.last_drawn_box[temp_handness] is not None:
elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
if elapsed_time < self.wait_box:
cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
# 为了方便观测 需要保留显示包围框一定时间
elif elapsed_time >= self.wait_box - 0.1:
box = self.hand_state.last_drawn_box[temp_handness]
x_min = max(0, min(box[:, 0]))
y_min = max(0, min(box[:, 1]))
x_max = min(image.shape[1], max(box[:, 0]))
y_max = min(image.shape[0], max(box[:, 1]))
cropped_image = image[y_min:y_max, x_min:x_max]
filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
cv2.imwrite(filename, cropped_image)
self.hand_state.last_drawn_box[temp_handness] = None
# 因为如果画完包围框立即剪裁 很有可能把手错误的剪裁进去
# 所以在包围框消失的前0.1秒剪裁 这样有足够的时间让手移走

+ 36
- 0
src_screenshot/utils/kalman_filter.py View File

@ -0,0 +1,36 @@
import numpy as np
from filterpy.kalman import KalmanFilter
class KalmanHandler:
def __init__(self):
self.kalman_filters = {
'Left': KalmanFilter(dim_x=4, dim_z=2),
'Right': KalmanFilter(dim_x=4, dim_z=2)
}
for key in self.kalman_filters:
self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
[0, 1, 0, 1],
[0, 0, 1, 0],
[0, 0, 0, 1]])
self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
[0, 1, 0, 0]])
self.kalman_filters[key].P *= 1000.
self.kalman_filters[key].R = 3
self.kalman_filters[key].Q = np.eye(4) * 0.01
# 这些参数通过多次测试得出 表现较为稳定
def kalman_filter_point(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.predict()
kf.update([x, y])
# 更新状态
return (kf.x[0], kf.x[1])
def reset_kalman_filter(self, hand_label, x, y):
kf = self.kalman_filters[hand_label]
kf.x = np.array([x, y, 0., 0.])
kf.P *= 1000.
# 重置

+ 17
- 0
src_screenshot/utils/model.py View File

@ -0,0 +1,17 @@
import mediapipe as mp
class HandTracker:
def __init__(self):
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
# 一只会更稳定
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
self.mp_drawing = mp.solutions.drawing_utils
def process(self, image):
results = self.hands.process(image)
return results

+ 24
- 0
src_screenshot/utils/process_images.py View File

@ -0,0 +1,24 @@
import cv2
import time
from hand_gesture import HandGestureHandler
class HandGestureProcessor:
def __init__(self):
self.hand_handler = HandGestureHandler()
def process_image(self, image, is_video):
start_time = time.time()
height, width = image.shape[:2]
image = cv2.flip(image, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 预处理传入的视频帧
image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)
spend_time = time.time() - start_time
FPS = 1.0 / spend_time if spend_time > 0 else 0
image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
# 计算并显示帧率
return image

+ 65
- 0
src_screenshot/utils/video_recognition.py View File

@ -0,0 +1,65 @@
import cv2
from process_images import HandGestureProcessor
from tkinter import messagebox
from PIL import Image, ImageTk
def start_camera(canvas):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
return "Cannot open camera"
gesture_processor = HandGestureProcessor()
show_frame(canvas, cap, gesture_processor)
def show_frame(canvas, cap, gesture_processor):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,False)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
# 对该帧进行处理并转换为RGB显示在画布上
canvas.after(10, show_frame, canvas, cap, gesture_processor)
# 实现循环调用 持续处理并显示后续的每一帧
else:
cap.release()
cv2.destroyAllWindows()
def upload_and_process_video(canvas, video_path):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return "Cannot open video file"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# 获取视频的参数
output_filename = "../video/processed_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
# 设置输出视频文件路径和编码
gesture_processor = HandGestureProcessor()
process_video_frame(canvas, cap, gesture_processor, out)
def process_video_frame(canvas, cap, gesture_processor, out):
success, frame = cap.read()
if success:
processed_frame = gesture_processor.process_image(frame,True)
out.write(processed_frame)
img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
imgtk = ImageTk.PhotoImage(image=img)
canvas.imgtk = imgtk
canvas.create_image(0, 0, anchor="nw", image=imgtk)
canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
else:
cap.release()
out.release()
cv2.destroyAllWindows()
messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
print("Processed video saved as processed_output.avi")

+ 2
- 0
src_voice_tip/README.md View File

@ -0,0 +1,2 @@
一个基于 Python 的实时语音字幕显示程序,可以将用户的语音实时转换为屏幕上的字幕文本。支持中文和英文识别

+ 3
- 0
src_voice_tip/requirements.txt View File

@ -0,0 +1,3 @@
vosk
sounddevice
numpy

+ 220
- 0
src_voice_tip/src/voice.py View File

@ -0,0 +1,220 @@
import tkinter as tk
import threading
import queue
import time
import json
import sounddevice as sd
import numpy as np
from vosk import Model, KaldiRecognizer
import os
import platform
class VoiceSubtitleApp:
def __init__(self, root):
self.root = root
self.root.title("实时语音字幕")
# 设置窗口属性:始终置顶,无边框,完全透明
if platform.system() == 'Darwin': # macOS
self.root.attributes('-topmost', 1)
self.root.attributes('-alpha', 1.0)
self.root.attributes('-transparent', True)
self.root.configure(bg='black') # 使用黑色背景
self.root.wm_attributes('-transparent', True)
self.root.update_idletasks()
self.root.lift()
else: # Windows 和其他系统
self.root.attributes('-topmost', True)
self.root.attributes('-alpha', 1.0)
self.root.configure(bg='black')
self.root.overrideredirect(True) # 无边框模式
# 设置窗口大小和位置
self.window_width = 800
self.window_height = 100
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
x = (screen_width - self.window_width) // 2
y = screen_height - self.window_height - 100
self.root.geometry(f"{self.window_width}x{self.window_height}+{x}+{y}")
# 创建文字标签
self.text_label = tk.Label(
root,
text="", # 初始时不显示文字
font=("Arial", 24, "bold"),
fg="white",
bg='black', # 使用黑色背景
wraplength=780,
highlightthickness=0,
borderwidth=0
)
self.text_label.pack(expand=True, fill='both', padx=10)
# 添加拖动功能
self.text_label.bind('<Button-1>', self.start_move)
self.text_label.bind('<B1-Motion>', self.on_move)
# 添加右键点击退出功能
self.text_label.bind('<Button-3>', lambda e: self.on_closing())
self.is_running = True
self.audio_queue = queue.Queue()
self.partial_result = ""
self.last_voice_time = time.time()
try:
# 初始化Vosk模型
print("正在加载语音识别模型...")
model_path = "../vosk-model-cn-0.22"
if not os.path.exists(model_path):
model_path = "../vosk-model-small-cn-0.22"
print("未找到中型模型,使用小型模型")
if not os.path.exists(model_path):
print(f"错误:找不到模型文件夹 {model_path}")
self.update_subtitle(f"错误:找不到模型文件夹 {model_path}")
return
print(f"使用模型: {model_path}")
self.model = Model(model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
self.recognizer.SetMaxAlternatives(0)
self.recognizer.SetWords(True)
print("模型加载完成")
# 获取可用的音频设备
devices = sd.query_devices()
print("可用的音频设备:")
for i, device in enumerate(devices):
print(f"{i}: {device['name']}")
# 使用默认输入设备
default_input = sd.query_devices(kind='input')
print(f"使用默认输入设备: {default_input['name']}")
# 启动音频处理线程
self.audio_thread = threading.Thread(target=self.process_audio)
self.audio_thread.daemon = True
self.audio_thread.start()
# 启动识别线程
self.recognition_thread = threading.Thread(target=self.recognize_speech)
self.recognition_thread.daemon = True
self.recognition_thread.start()
except Exception as e:
print(f"初始化错误: {str(e)}")
self.update_subtitle(f"初始化失败: {str(e)}")
return
def start_move(self, event):
"""开始拖动窗口"""
self.x = event.x
self.y = event.y
def on_move(self, event):
"""处理窗口拖动"""
deltax = event.x - self.x
deltay = event.y - self.y
x = self.root.winfo_x() + deltax
y = self.root.winfo_y() + deltay
self.root.geometry(f"+{x}+{y}")
def audio_callback(self, indata, frames, time, status):
"""音频回调函数"""
if status:
print(status)
self.audio_queue.put(bytes(indata))
def process_audio(self):
"""处理音频输入"""
try:
with sd.RawInputStream(samplerate=16000, channels=1, dtype='int16',
blocksize=4000,
device=None,
callback=self.audio_callback):
print("开始录音...")
while self.is_running:
time.sleep(0.05)
self.root.after(0, self.fade_out_text)
except Exception as e:
print(f"音频处理错误: {str(e)}")
self.root.after(0, self.update_subtitle, f"音频处理错误: {str(e)}")
def recognize_speech(self):
"""语音识别处理"""
print("开始识别...")
while self.is_running:
try:
audio_data = self.audio_queue.get(timeout=0.5)
if len(audio_data) == 0:
continue
if self.recognizer.AcceptWaveform(audio_data):
result = json.loads(self.recognizer.Result())
text = result.get("text", "").strip()
if text:
print(f"最终结果: {text}")
self.last_voice_time = time.time()
self.root.after(0, self.update_subtitle, text)
else:
partial = json.loads(self.recognizer.PartialResult())
partial_text = partial.get("partial", "").strip()
if partial_text and partial_text != self.partial_result:
self.partial_result = partial_text
print(f"部分结果: {partial_text}")
self.last_voice_time = time.time()
self.root.after(0, self.update_subtitle, partial_text)
except queue.Empty:
continue
except Exception as e:
print(f"识别错误: {str(e)}")
time.sleep(0.1)
def fade_out_text(self):
"""文字淡出效果"""
try:
if time.time() - self.last_voice_time > 3: # 3秒无输入后开始淡化
current_color = self.text_label.cget('fg')
if current_color == 'white': # 如果是完全不透明
self.text_label.configure(fg='#FFFFFF') # 设置初始颜色
else:
# 提取当前颜色值
color = current_color.lstrip('#')
if len(color) == 6: # 确保是有效的颜色值
# 降低不透明度
new_alpha = max(0, int(color[0:2], 16) - 15)
if new_alpha > 0: # 如果还没有完全透明
new_color = f'#{new_alpha:02x}{new_alpha:02x}{new_alpha:02x}'
self.text_label.configure(fg=new_color)
self.root.after(50, self.fade_out_text) # 继续淡化
else:
# 完全透明时清空文字
self.text_label.config(text="")
self.text_label.update()
except Exception as e:
print(f"淡化效果错误: {str(e)}")
def update_subtitle(self, text):
"""更新字幕文本"""
if not text:
return
self.text_label.config(text=text)
self.text_label.configure(fg='white')
self.text_label.update()
self.last_voice_time = time.time()
def on_closing(self):
self.is_running = False
self.root.destroy()
if __name__ == "__main__":
root = tk.Tk()
app = VoiceSubtitleApp(root)
root.mainloop()

+ 6
- 0
src_voice_tip/vosk-model-small-cn-0.22/README View File

@ -0,0 +1,6 @@
Chinese Vosk model for mobile
CER results
23.54% speechio_02
38.29% speechio_06

BIN
src_voice_tip/vosk-model-small-cn-0.22/am/final.mdl View File


+ 8
- 0
src_voice_tip/vosk-model-small-cn-0.22/conf/mfcc.conf View File

@ -0,0 +1,8 @@
--use-energy=false
--sample-frequency=16000
--num-mel-bins=40
--num-ceps=40
--low-freq=40
--high-freq=-200
--allow-upsample=true
--allow-downsample=true

+ 10
- 0
src_voice_tip/vosk-model-small-cn-0.22/conf/model.conf View File

@ -0,0 +1,10 @@
--min-active=200
--max-active=5000
--beam=12.0
--lattice-beam=4.0
--acoustic-scale=1.0
--frame-subsampling-factor=3
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
--endpoint.rule2.min-trailing-silence=0.5
--endpoint.rule3.min-trailing-silence=1.0
--endpoint.rule4.min-trailing-silence=2.0

BIN
src_voice_tip/vosk-model-small-cn-0.22/graph/Gr.fst View File


BIN
src_voice_tip/vosk-model-small-cn-0.22/graph/HCLr.fst View File


+ 39
- 0
src_voice_tip/vosk-model-small-cn-0.22/graph/disambig_tid.int View File

@ -0,0 +1,39 @@
11845
11846
11847
11848
11849
11850
11851
11852
11853
11854
11855
11856
11857
11858
11859
11860
11861
11862
11863
11864
11865
11866
11867
11868
11869
11870
11871
11872
11873
11874
11875
11876
11877
11878
11879
11880
11881
11882
11883

+ 646
- 0
src_voice_tip/vosk-model-small-cn-0.22/graph/phones/word_boundary.int View File

@ -0,0 +1,646 @@
1 nonword
2 begin
3 end
4 internal
5 singleton
6 nonword
7 begin
8 end
9 internal
10 singleton
11 begin
12 end
13 internal
14 singleton
15 begin
16 end
17 internal
18 singleton
19 begin
20 end
21 internal
22 singleton
23 begin
24 end
25 internal
26 singleton
27 begin
28 end
29 internal
30 singleton
31 begin
32 end
33 internal
34 singleton
35 begin
36 end
37 internal
38 singleton
39 begin
40 end
41 internal
42 singleton
43 begin
44 end
45 internal
46 singleton
47 begin
48 end
49 internal
50 singleton
51 begin
52 end
53 internal
54 singleton
55 begin
56 end
57 internal
58 singleton
59 begin
60 end
61 internal
62 singleton
63 begin
64 end
65 internal
66 singleton
67 begin
68 end
69 internal
70 singleton
71 begin
72 end
73 internal
74 singleton
75 begin
76 end
77 internal
78 singleton
79 begin
80 end
81 internal
82 singleton
83 begin
84 end
85 internal
86 singleton
87 begin
88 end
89 internal
90 singleton
91 begin
92 end
93 internal
94 singleton
95 begin
96 end
97 internal
98 singleton
99 begin
100 end
101 internal
102 singleton
103 begin
104 end
105 internal
106 singleton
107 begin
108 end
109 internal
110 singleton
111 begin
112 end
113 internal
114 singleton
115 begin
116 end
117 internal
118 singleton
119 begin
120 end
121 internal
122 singleton
123 begin
124 end
125 internal
126 singleton
127 begin
128 end
129 internal
130 singleton
131 begin
132 end
133 internal
134 singleton
135 begin
136 end
137 internal
138 singleton
139 begin
140 end
141 internal
142 singleton
143 begin
144 end
145 internal
146 singleton
147 begin
148 end
149 internal
150 singleton
151 begin
152 end
153 internal
154 singleton
155 begin
156 end
157 internal
158 singleton
159 begin
160 end
161 internal
162 singleton
163 begin
164 end
165 internal
166 singleton
167 begin
168 end
169 internal
170 singleton
171 begin
172 end
173 internal
174 singleton
175 begin
176 end
177 internal
178 singleton
179 begin
180 end
181 internal
182 singleton
183 begin
184 end
185 internal
186 singleton
187 begin
188 end
189 internal
190 singleton
191 begin
192 end
193 internal
194 singleton
195 begin
196 end
197 internal
198 singleton
199 begin
200 end
201 internal
202 singleton
203 begin
204 end
205 internal
206 singleton
207 begin
208 end
209 internal
210 singleton
211 begin
212 end
213 internal
214 singleton
215 begin
216 end
217 internal
218 singleton
219 begin
220 end
221 internal
222 singleton
223 begin
224 end
225 internal
226 singleton
227 begin
228 end
229 internal
230 singleton
231 begin
232 end
233 internal
234 singleton
235 begin
236 end
237 internal
238 singleton
239 begin
240 end
241 internal
242 singleton
243 begin
244 end
245 internal
246 singleton
247 begin
248 end
249 internal
250 singleton
251 begin
252 end
253 internal
254 singleton
255 begin
256 end
257 internal
258 singleton
259 begin
260 end
261 internal
262 singleton
263 begin
264 end
265 internal
266 singleton
267 begin
268 end
269 internal
270 singleton
271 begin
272 end
273 internal
274 singleton
275 begin
276 end
277 internal
278 singleton
279 begin
280 end
281 internal
282 singleton
283 begin
284 end
285 internal
286 singleton
287 begin
288 end
289 internal
290 singleton
291 begin
292 end
293 internal
294 singleton
295 begin
296 end
297 internal
298 singleton
299 begin
300 end
301 internal
302 singleton
303 begin
304 end
305 internal
306 singleton
307 begin
308 end
309 internal
310 singleton
311 begin
312 end
313 internal
314 singleton
315 begin
316 end
317 internal
318 singleton
319 begin
320 end
321 internal
322 singleton
323 begin
324 end
325 internal
326 singleton
327 begin
328 end
329 internal
330 singleton
331 begin
332 end
333 internal
334 singleton
335 begin
336 end
337 internal
338 singleton
339 begin
340 end
341 internal
342 singleton
343 begin
344 end
345 internal
346 singleton
347 begin
348 end
349 internal
350 singleton
351 begin
352 end
353 internal
354 singleton
355 begin
356 end
357 internal
358 singleton
359 begin
360 end
361 internal
362 singleton
363 begin
364 end
365 internal
366 singleton
367 begin
368 end
369 internal
370 singleton
371 begin
372 end
373 internal
374 singleton
375 begin
376 end
377 internal
378 singleton
379 begin
380 end
381 internal
382 singleton
383 begin
384 end
385 internal
386 singleton
387 begin
388 end
389 internal
390 singleton
391 begin
392 end
393 internal
394 singleton
395 begin
396 end
397 internal
398 singleton
399 begin
400 end
401 internal
402 singleton
403 begin
404 end
405 internal
406 singleton
407 begin
408 end
409 internal
410 singleton
411 begin
412 end
413 internal
414 singleton
415 begin
416 end
417 internal
418 singleton
419 begin
420 end
421 internal
422 singleton
423 begin
424 end
425 internal
426 singleton
427 begin
428 end
429 internal
430 singleton
431 begin
432 end
433 internal
434 singleton
435 begin
436 end
437 internal
438 singleton
439 begin
440 end
441 internal
442 singleton
443 begin
444 end
445 internal
446 singleton
447 begin
448 end
449 internal
450 singleton
451 begin
452 end
453 internal
454 singleton
455 begin
456 end
457 internal
458 singleton
459 begin
460 end
461 internal
462 singleton
463 begin
464 end
465 internal
466 singleton
467 begin
468 end
469 internal
470 singleton
471 begin
472 end
473 internal
474 singleton
475 begin
476 end
477 internal
478 singleton
479 begin
480 end
481 internal
482 singleton
483 begin
484 end
485 internal
486 singleton
487 begin
488 end
489 internal
490 singleton
491 begin
492 end
493 internal
494 singleton
495 begin
496 end
497 internal
498 singleton
499 begin
500 end
501 internal
502 singleton
503 begin
504 end
505 internal
506 singleton
507 begin
508 end
509 internal
510 singleton
511 begin
512 end
513 internal
514 singleton
515 begin
516 end
517 internal
518 singleton
519 begin
520 end
521 internal
522 singleton
523 begin
524 end
525 internal
526 singleton
527 begin
528 end
529 internal
530 singleton
531 begin
532 end
533 internal
534 singleton
535 begin
536 end
537 internal
538 singleton
539 begin
540 end
541 internal
542 singleton
543 begin
544 end
545 internal
546 singleton
547 begin
548 end
549 internal
550 singleton
551 begin
552 end
553 internal
554 singleton
555 begin
556 end
557 internal
558 singleton
559 begin
560 end
561 internal
562 singleton
563 begin
564 end
565 internal
566 singleton
567 begin
568 end
569 internal
570 singleton
571 begin
572 end
573 internal
574 singleton
575 begin
576 end
577 internal
578 singleton
579 begin
580 end
581 internal
582 singleton
583 begin
584 end
585 internal
586 singleton
587 begin
588 end
589 internal
590 singleton
591 begin
592 end
593 internal
594 singleton
595 begin
596 end
597 internal
598 singleton
599 begin
600 end
601 internal
602 singleton
603 begin
604 end
605 internal
606 singleton
607 begin
608 end
609 internal
610 singleton
611 begin
612 end
613 internal
614 singleton
615 begin
616 end
617 internal
618 singleton
619 begin
620 end
621 internal
622 singleton
623 begin
624 end
625 internal
626 singleton
627 begin
628 end
629 internal
630 singleton
631 begin
632 end
633 internal
634 singleton
635 begin
636 end
637 internal
638 singleton
639 begin
640 end
641 internal
642 singleton
643 begin
644 end
645 internal
646 singleton

BIN
src_voice_tip/vosk-model-small-cn-0.22/ivector/final.dubm View File


BIN
src_voice_tip/vosk-model-small-cn-0.22/ivector/final.ie View File


BIN
src_voice_tip/vosk-model-small-cn-0.22/ivector/final.mat View File


+ 3
- 0
src_voice_tip/vosk-model-small-cn-0.22/ivector/global_cmvn.stats View File

@ -0,0 +1,3 @@
[
1.117107e+11 -7.827721e+08 -1.101398e+10 -2.193934e+09 -1.347332e+10 -1.613916e+10 -1.199561e+10 -1.255081e+10 -1.638895e+10 -3.821099e+09 -1.372833e+10 -5.244242e+09 -1.098187e+10 -3.655235e+09 -9.364579e+09 -4.285302e+09 -6.296873e+09 -1.552953e+09 -3.176746e+09 -1.202976e+08 -9.857023e+08 2.316555e+08 -1.61059e+08 -5.891868e+07 3.465849e+08 -1.842054e+08 3.248211e+08 -1.483965e+08 3.739239e+08 -6.672061e+08 4.442288e+08 -9.274889e+08 5.142684e+08 4.292036e+07 2.206386e+08 -4.532715e+08 -2.092499e+08 -3.70488e+08 -8.079404e+07 -8.425977e+07 1.344125e+09
9.982632e+12 1.02635e+12 8.634624e+11 9.06451e+11 9.652096e+11 1.12772e+12 9.468372e+11 9.141218e+11 9.670484e+11 6.936961e+11 8.141006e+11 6.256321e+11 6.087707e+11 4.616898e+11 4.212042e+11 2.862872e+11 2.498089e+11 1.470856e+11 1.099197e+11 5.780894e+10 3.118114e+10 1.060667e+10 1.466199e+09 4.173056e+08 5.257362e+09 1.277714e+10 2.114478e+10 2.974502e+10 3.587691e+10 4.078971e+10 4.247745e+10 4.382608e+10 4.62521e+10 4.575282e+10 3.546206e+10 3.041531e+10 2.838562e+10 2.258604e+10 1.715295e+10 1.303227e+10 0 ]

+ 0
- 0
src_voice_tip/vosk-model-small-cn-0.22/ivector/online_cmvn.conf View File


+ 2
- 0
src_voice_tip/vosk-model-small-cn-0.22/ivector/splice.conf View File

@ -0,0 +1,2 @@
--left-context=3
--right-context=3

Loading…
Cancel
Save