load_other_pro

2 months ago · 213792c936
--- a/src_screenshot/LICENSE
+++ b/src_screenshot/LICENSE
@ -0,0 +1,21 @@
 MIT License

 Copyright (c) 2024 EzraZephyr

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/src_screenshot/image/cropped_Right_1753179393.jpg
+++ b/src_screenshot/image/cropped_Right_1753179393.jpg
--- a/src_screenshot/image/cropped_Right_1753179532.jpg
+++ b/src_screenshot/image/cropped_Right_1753179532.jpg
--- a/src_screenshot/image/cropped_Right_1753179605.jpg
+++ b/src_screenshot/image/cropped_Right_1753179605.jpg
--- a/src_screenshot/main.py
+++ b/src_screenshot/main.py
--- a/src_screenshot/utils/GUI.py
+++ b/src_screenshot/utils/GUI.py
@ -0,0 +1,127 @@
 import cv2
 import tkinter as tk
 from tkinter import filedialog, messagebox
 from video_recognition import start_camera, upload_and_process_video, show_frame
 from process_images import HandGestureProcessor


 current_mode = None
 current_cap = None
 # 用于追踪当前模式和摄像头资源

 # 初始化图形界面主要的逻辑
 def create_gui():
    try:
        print("开始创建GUI界面")
        root = tk.Tk()
        root.title("Gesture Recognition")
        root.geometry("800x600")
        print("GUI窗口创建成功")

        canvas = tk.Canvas(root, width=640, height=480)
        canvas.pack()
        print("画布创建成功")

        camera_button = tk.Button(
            root,
            text="Use Camera for Real-time Recognition",
            command=lambda: switch_to_camera(canvas)
        )
        camera_button.pack(pady=10)
        print("摄像头按钮创建成功")

        video_button = tk.Button(
            root,
            text="Upload Video File for Processing",
            command=lambda: select_and_process_video(canvas, root)
        )
        video_button.pack(pady=10)
        print("视频上传按钮创建成功")

        print("GUI界面创建完成，进入主循环")
        root.mainloop()
        
    except Exception as e:
        print(f"[ERROR] 创建GUI时发生异常: {str(e)}")
        import traceback
        print(traceback.format_exc())

 # 切换到摄像头实时识别模式
 def switch_to_camera(canvas):
    global current_mode, current_cap

    stop_current_operation()
    # 停止当前操作并释放摄像头

    current_mode = "camera"
    canvas.delete("all")
    # 设置当前模式为摄像头并清空Canvas

    current_cap = cv2.VideoCapture(1)
    current_cap.open(0)
    if not current_cap.isOpened():
        messagebox.showerror("Error", "Cannot open camera")
        current_mode = None
        return
    # 启动摄像头

    start_camera(canvas, current_cap)
    # 传入canvas和current_cap

 # 切换到视频流处理模式
 def select_and_process_video(canvas, root):
    global current_mode, current_cap

    stop_current_operation()
    current_mode = "video"
    canvas.delete("all")

    video_path = filedialog.askopenfilename(
        title="Select a Video File",
        filetypes=(("MP4 files", "*.mp4"), ("AVI files", "*.avi"), ("All files", "*.*"))
    )
    # 选择视频文件

    if video_path:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            messagebox.showerror("Error", "Cannot open video file")
            return
        # 获取视频的宽高并调整 Canvas 大小

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        cap.release()
        canvas.config(width=frame_width, height=frame_height)
        root.geometry(f"{frame_width + 160}x{frame_height + 200}")  # 调整窗口大小
        # 获取视频宽高并动态调整canvas的大小

        error_message = upload_and_process_video(canvas, video_path)
        if error_message:
            messagebox.showerror("Error", error_message)
        # 上传并处理视频文件

 def stop_current_operation():

    global current_cap

    if current_cap and current_cap.isOpened():
        current_cap.release()
        cv2.destroyAllWindows()
        current_cap = None
        # 停止当前操作 释放摄像头资源并关闭所有窗口

 def start_camera(canvas, cap):
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)
    # 启动摄像头进行实时手势识别





 if __name__ == "__main__":
    create_gui()
--- a/src_screenshot/utils/init.py
+++ b/src_screenshot/utils/init.py
--- a/src_screenshot/utils/pycache/finger_drawer.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/finger_drawer.cpython-312.pyc
--- a/src_screenshot/utils/pycache/finger_drawer.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/finger_drawer.cpython-38.pyc
--- a/src_screenshot/utils/pycache/gesture_data.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/gesture_data.cpython-312.pyc
--- a/src_screenshot/utils/pycache/gesture_data.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/gesture_data.cpython-38.pyc
--- a/src_screenshot/utils/pycache/hand_gesture.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/hand_gesture.cpython-312.pyc
--- a/src_screenshot/utils/pycache/hand_gesture.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/hand_gesture.cpython-38.pyc
--- a/src_screenshot/utils/pycache/index_finger.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/index_finger.cpython-312.pyc
--- a/src_screenshot/utils/pycache/index_finger.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/index_finger.cpython-38.pyc
--- a/src_screenshot/utils/pycache/kalman_filter.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/kalman_filter.cpython-312.pyc
--- a/src_screenshot/utils/pycache/kalman_filter.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/kalman_filter.cpython-38.pyc
--- a/src_screenshot/utils/pycache/model.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/model.cpython-312.pyc
--- a/src_screenshot/utils/pycache/model.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/model.cpython-38.pyc
--- a/src_screenshot/utils/pycache/process_images.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/process_images.cpython-312.pyc
--- a/src_screenshot/utils/pycache/process_images.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/process_images.cpython-38.pyc
--- a/src_screenshot/utils/pycache/video_recognition.cpython-312.pyc
+++ b/src_screenshot/utils/pycache/video_recognition.cpython-312.pyc
--- a/src_screenshot/utils/pycache/video_recognition.cpython-38.pyc
+++ b/src_screenshot/utils/pycache/video_recognition.cpython-38.pyc
--- a/src_screenshot/utils/finger_drawer.py
+++ b/src_screenshot/utils/finger_drawer.py
@ -0,0 +1,34 @@
 import cv2

 class FingerDrawer:
    @staticmethod
    def draw_finger_points(image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        index_finger_tip_str = ''

        for i in range(21):
            cx = int(hand_21.landmark[i].x * width)
            cy = int(hand_21.landmark[i].y * height)
            cz = hand_21.landmark[i].z
            depth_z = cz0 - cz
            radius = max(int(6 * (1 + depth_z * 5)), 0)
            # 根据深度调整圆点的半径


            if i == 0:
                image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)
            elif i == 8:
                image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)
                index_finger_tip_str += f'{temp_handness}:{depth_z:.2f}, '
            elif i in [1, 5, 9, 13, 17]:
                image = cv2.circle(image, (cx, cy), radius, (0, 0, 255), thickness=-1)
            elif i in [2, 6, 10, 14, 18]:
                image = cv2.circle(image, (cx, cy), radius, (75, 0, 130), thickness=-1)
            elif i in [3, 7, 11, 15, 19]:
                image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)
            elif i in [4, 12, 16, 20]:
                image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)
            # 根据每组关节绘制不同颜色的圆点 同时根据距离掌根的深度信息进行调整

        return image, index_finger_tip_str
--- a/src_screenshot/utils/gesture_data.py
+++ b/src_screenshot/utils/gesture_data.py
@ -0,0 +1,43 @@
 from collections import deque

 class HandState:
    def __init__(self):
        self.gesture_locked = {'Left': False, 'Right': False}
        self.gesture_start_time = {'Left': 0, 'Right': 0}
        self.buffer_start_time = {'Left': 0, 'Right': 0}
        self.start_drag_time = {'Left': 0, 'Right': 0}
        self.dragging = {'Left': False, 'Right': False}
        self.drag_point = {'Left': (0, 0), 'Right': (0, 0)}
        self.buffer_duration = {'Left': 0.25, 'Right': 0.25}
        self.is_index_finger_up = {'Left': False, 'Right': False}
        self.index_finger_second = {'Left': 0, 'Right': 0}
        self.index_finger_tip = {'Left': 0, 'Right': 0}
        self.trajectory = {'Left': [], 'Right': []}
        self.square_queue = deque()
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2
        self.rect_draw_time = {'Left': 0, 'Right': 0}
        self.last_drawn_box = {'Left': None, 'Right': None}

    def clear_hand_states(self, detected_hand='Both'):

        hands_to_clear = {'Left', 'Right'}
        if detected_hand == 'Both':
            hands_to_clear = hands_to_clear
        else:
            hands_to_clear -= {detected_hand}

        for h in hands_to_clear:
            self.gesture_locked[h] = False
            self.gesture_start_time[h] = 0
            self.buffer_start_time[h] = 0
            self.dragging[h] = False
            self.drag_point[h] = (0, 0)
            self.buffer_duration[h] = 0.25
            self.is_index_finger_up[h] = False
            self.trajectory[h].clear()
            self.start_drag_time[h] = 0
            self.rect_draw_time[h] = 0
            self.last_drawn_box[h] = None
        # 用于记录左右手的信息 需要分开存放 否则可能会出现数据冲突
--- a/src_screenshot/utils/gesture_process.py
+++ b/src_screenshot/utils/gesture_process.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 获取图像尺寸 翻转并转换颜色空间

        image = self.hand_handler.handle_hand_gestures(image, width, height)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # 计算并显示帧率

        return image
--- a/src_screenshot/utils/gesture_recognition.ipynb
+++ b/src_screenshot/utils/gesture_recognition.ipynb
@ -0,0 +1,437 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "initial_id",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.761076Z",
     "start_time": "2024-09-07T05:11:22.404354Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import cv2\n",
    "import time\n",
    "import mediapipe\n",
    "import numpy as np\n",
    "from collections import deque\n",
    "from filterpy.kalman import KalmanFilter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "40aada17ccd31fe",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.777139Z",
     "start_time": "2024-09-07T05:11:28.761076Z"
    }
   },
   "outputs": [],
   "source": [
    "gesture_locked = {'Left':False,'Right':False}\n",
    "gesture_start_time = {'Left':0,'Right':0}\n",
    "buffer_start_time = {'Left':0,'Right':0}\n",
    "start_drag_time = {'Left':0,'Right':0}\n",
    "dragging = {'Left':False,'Right':False}\n",
    "drag_point = {'Left':(0, 0),'Right':(0, 0)}\n",
    "buffer_duration = {'Left':0.25,'Right':0.25}\n",
    "is_index_finger_up = {'Left':False,'Right':False}\n",
    "index_finger_second = {'Left':0,'Right':0}\n",
    "index_finger_tip = {'Left':0,'Right':0}\n",
    "trajectory = {'Left':[],'Right':[]}\n",
    "square_queue = deque()\n",
    "wait_time = 1.5\n",
    "kalman_wait_time = 0.5\n",
    "wait_box = 2\n",
    "rect_draw_time = {'Left':0,'Right':0}\n",
    "last_drawn_box = {'Left':None,'Right':None}\n",
    "elapsed_time = {'Left':0,'Right':0}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2ee9323bb1c25cc0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.824573Z",
     "start_time": "2024-09-07T05:11:28.777139Z"
    }
   },
   "outputs": [],
   "source": [
    "def clear_hand_states(detected_hand ='Both'):\n",
    "    global gesture_locked, gesture_start_time, buffer_start_time, dragging, drag_point, buffer_duration,is_index_finger_up, trajectory,wait_time,kalman_wait_time, start_drag_time, rect_draw_time, last_drawn_box, wait_box, elapsed_time\n",
    "    \n",
    "    hands_to_clear = {'Left', 'Right'}\n",
    "    if detected_hand == 'Both':\n",
    "        hands_to_clear = hands_to_clear\n",
    "    else:\n",
    "        hands_to_clear -= {detected_hand}\n",
    "        # 反向判断左右手\n",
    "\n",
    "    for h in hands_to_clear:\n",
    "        gesture_locked[h] = False\n",
    "        gesture_start_time[h] = 0\n",
    "        buffer_start_time[h] = 0\n",
    "        dragging[h] = False\n",
    "        drag_point[h] = (0, 0)\n",
    "        buffer_duration[h] = 0.25\n",
    "        is_index_finger_up[h] = False\n",
    "        trajectory[h].clear()\n",
    "        start_drag_time[h] = 0\n",
    "        rect_draw_time[h] = 0\n",
    "        last_drawn_box[h] = None\n",
    "        elapsed_time[h] = 0\n",
    "        # 清空没被检测的手"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "96cf431d2562e7d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.855831Z",
     "start_time": "2024-09-07T05:11:28.824573Z"
    }
   },
   "outputs": [],
   "source": [
    "kalman_filters = {\n",
    "    'Left': KalmanFilter(dim_x=4, dim_z=2),\n",
    "    'Right': KalmanFilter(dim_x=4, dim_z=2)\n",
    "}\n",
    "\n",
    "for key in kalman_filters:\n",
    "    kalman_filters[key].x = np.array([0., 0., 0., 0.])\n",
    "    kalman_filters[key].F = np.array([[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]])\n",
    "    # 状态转移矩阵\n",
    "    kalman_filters[key].H = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])\n",
    "    # 观测矩阵\n",
    "    kalman_filters[key].P *= 1000.\n",
    "    kalman_filters[key].R = 3\n",
    "    kalman_filters[key].Q = np.eye(4) * 0.01\n",
    "\n",
    "def kalman_filter_point(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.predict()\n",
    "    kf.update([x, y])\n",
    "    # 更新状态\n",
    "    return (kf.x[0], kf.x[1])\n",
    "\n",
    "def reset_kalman_filter(hand_label, x, y):\n",
    "    kf = kalman_filters[hand_label]\n",
    "    kf.x = np.array([x, y, 0., 0.])\n",
    "    kf.P *= 1000.\n",
    "    # 重置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "edc274b7ed495122",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.887346Z",
     "start_time": "2024-09-07T05:11:28.855831Z"
    }
   },
   "outputs": [],
   "source": [
    "\n",
    "mp_hands = mediapipe.solutions.hands\n",
    "\n",
    "hands = mp_hands.Hands(\n",
    "    static_image_mode=False,\n",
    "    max_num_hands=2,\n",
    "    # 一只更稳定\n",
    "    min_detection_confidence=0.5,\n",
    "    min_tracking_confidence=0.5\n",
    ")\n",
    "\n",
    "mp_drawing = mediapipe.solutions.drawing_utils\n",
    "clear_hand_states()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51ff809ecaf1f899",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:11:28.934274Z",
     "start_time": "2024-09-07T05:11:28.887346Z"
    }
   },
   "outputs": [],
   "source": [
    "def process_image(image):\n",
    "\n",
    "    start_time = time.time()\n",
    "    height, width = image.shape[:2]\n",
    "    image = cv2.flip(image, 1)\n",
    "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
    "    # 预处理帧\n",
    "    \n",
    "    results = hands.process(image)\n",
    "    \n",
    "    if results.multi_hand_landmarks:\n",
    "        # 如果检测到手\n",
    "        \n",
    "        handness_str = ''\n",
    "        index_finger_tip_str = ''\n",
    "        \n",
    "        if len(results.multi_hand_landmarks) == 1:\n",
    "            clear_hand_states(detected_hand = results.multi_handedness[0].classification[0].label)\n",
    "            # 如果只有一只手 则清空另一只手的数据 避免后续冲突导致不稳定\n",
    "        \n",
    "        for hand_idx in range(len(results.multi_hand_landmarks)):\n",
    "            \n",
    "            hand_21 = results.multi_hand_landmarks[hand_idx]\n",
    "            mp_drawing.draw_landmarks(image, hand_21, mp_hands.HAND_CONNECTIONS)\n",
    "            \n",
    "            temp_handness = results.multi_handedness[hand_idx].classification[0].label\n",
    "            handness_str += '{}:{}, '.format(hand_idx, temp_handness)\n",
    "            is_index_finger_up[temp_handness] = False\n",
    "            # 先设置为false 防止放下被错误更新为竖起\n",
    "            \n",
    "            cz0 = hand_21.landmark[0].z\n",
    "            index_finger_second[temp_handness] = hand_21.landmark[7]\n",
    "            index_finger_tip[temp_handness] = hand_21.landmark[8]\n",
    "            # 食指指尖和第一个关节\n",
    "            \n",
    "            index_x, index_y = int(index_finger_tip[temp_handness].x * width), int(index_finger_tip[temp_handness].y * height)\n",
    "\n",
    "            if all(index_finger_second[temp_handness].y < hand_21.landmark[i].y for i in range(21) if i not in [7, 8]) and index_finger_tip[temp_handness].y < index_finger_second[temp_handness].y:\n",
    "                is_index_finger_up[temp_handness] = True\n",
    "                # 如果指尖和第二个关节高度大于整只手所有关节点 则视为执行“指向”操作 \n",
    "\n",
    "            if is_index_finger_up[temp_handness]:\n",
    "                if not gesture_locked[temp_handness]:\n",
    "                    if gesture_start_time[temp_handness] == 0:\n",
    "                        gesture_start_time[temp_handness] = time.time()\n",
    "                        # 记录食指抬起的时间\n",
    "                    elif time.time() - gesture_start_time[temp_handness] > wait_time:\n",
    "                        dragging[temp_handness] = True\n",
    "                        gesture_locked[temp_handness] = True\n",
    "                        drag_point[temp_handness] = (index_x, index_y)\n",
    "                        # 如果食指抬起的时间大于预设的等待时间则视为执行“指向”操作\n",
    "                    buffer_start_time[temp_handness] = 0\n",
    "                    # 检测到食指竖起就刷新缓冲时间\n",
    "            else:\n",
    "                if buffer_start_time[temp_handness] == 0:\n",
    "                    buffer_start_time[temp_handness] = time.time()\n",
    "                elif time.time() - buffer_start_time[temp_handness] > buffer_duration[temp_handness]:\n",
    "                    gesture_start_time[temp_handness] = 0\n",
    "                    gesture_locked[temp_handness] = False\n",
    "                    dragging[temp_handness] = False\n",
    "                    # 如果缓冲时间大于设定 就证明已经结束指向操作\n",
    "                    # 这样可以防止某一帧识别有误导致指向操作被错误清除\n",
    "                    \n",
    "            if dragging[temp_handness]:\n",
    "\n",
    "                if start_drag_time[temp_handness] == 0:\n",
    "                    start_drag_time[temp_handness] = time.time()\n",
    "                    reset_kalman_filter(temp_handness, index_x, index_y)\n",
    "                    # 每次画线的时候初始化滤波器\n",
    "                    \n",
    "                smooth_x, smooth_y = kalman_filter_point(temp_handness, index_x, index_y)\n",
    "                drag_point[temp_handness] = (index_x, index_y)\n",
    "                index_finger_radius = max(int(10 * (1 + (cz0 - index_finger_tip[temp_handness].z) * 5)), 0)\n",
    "                cv2.circle(image, drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)\n",
    "                # 根据离掌根的深度距离来构建一个圆\n",
    "                # 用来显示已经开始指向操作\n",
    "                # 和下方构建的深度点位对应 直接用倍数\n",
    "                drag_point_smooth = (smooth_x, smooth_y)\n",
    "                \n",
    "                if time.time() - start_drag_time[temp_handness] > kalman_wait_time:\n",
    "                    trajectory[temp_handness].append(drag_point_smooth)\n",
    "                    # 因为kalman滤波器初始化的时候会很不稳定 前几帧通常会有较为严重的噪声\n",
    "                    # 所以直接等待前几帧运行完成之后再将点位加到轨迹列表中\n",
    "            else:\n",
    "                if len(trajectory[temp_handness]) > 4:\n",
    "                    contour = np.array(trajectory[temp_handness], dtype=np.int32)\n",
    "                    rect = cv2.minAreaRect(contour)\n",
    "                    box = cv2.boxPoints(rect)\n",
    "                    box = np.int64(box)\n",
    "                    rect_draw_time[temp_handness] = time.time()\n",
    "                    last_drawn_box[temp_handness] = box\n",
    "                    # 如果指向操作结束 轨迹列表有至少四个点的时候\n",
    "                    # 使用最小包围图形将画的不规则图案调整为一个矩形\n",
    "\n",
    "                start_drag_time[temp_handness] = 0\n",
    "                trajectory[temp_handness].clear()\n",
    "\n",
    "            for i in range(1, len(trajectory[temp_handness])):\n",
    "\n",
    "                pt1 = (int(trajectory[temp_handness][i-1][0]), int(trajectory[temp_handness][i-1][1]))\n",
    "                pt2 = (int(trajectory[temp_handness][i][0]), int(trajectory[temp_handness][i][1]))\n",
    "                cv2.line(image, pt1, pt2, (0, 0, 255), 2)\n",
    "                # 绘制连接轨迹点的线\n",
    "\n",
    "            if last_drawn_box[temp_handness] is not None:\n",
    "                elapsed_time[temp_handness] = time.time() - rect_draw_time[temp_handness]\n",
    "                \n",
    "                if elapsed_time[temp_handness] < wait_box:\n",
    "                    cv2.drawContours(image, [last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)\n",
    "                    # 将矩形框保留一段时间 否则一帧太快 无法看清效果\n",
    "                    \n",
    "                elif elapsed_time[temp_handness] >= wait_box - 0.1:\n",
    "                    \n",
    "                    box = last_drawn_box[temp_handness]\n",
    "                    x_min = max(0, min(box[:, 0]))\n",
    "                    y_min = max(0, min(box[:, 1]))\n",
    "                    x_max = min(image.shape[1], max(box[:, 0]))\n",
    "                    y_max = min(image.shape[0], max(box[:, 1]))\n",
    "                    cropped_image = image[y_min:y_max, x_min:x_max]\n",
    "                    filename = f\"../image/cropped_{temp_handness}_{int(time.time())}.jpg\"\n",
    "                    cv2.imwrite(filename, cropped_image)\n",
    "                    last_drawn_box[temp_handness] = None\n",
    "                # 不能直接剪裁画完的图像 可能会错误的将手剪裁进去\n",
    "                # 等待一段时间 有一个给手缓冲移动走的时间再将这一帧里的矩形提取出来\n",
    "                        \n",
    "            for i in range(21):\n",
    "                \n",
    "                cx = int(hand_21.landmark[i].x * width)\n",
    "                cy = int(hand_21.landmark[i].y * height)\n",
    "                cz = hand_21.landmark[i].z\n",
    "                depth_z = cz0 - cz\n",
    "                radius = max(int(6 * (1 + depth_z*5)), 0)\n",
    "                \n",
    "                if i == 0:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 255, 0), thickness=-1)\n",
    "                if i == 8:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (255, 165, 0), thickness=-1)\n",
    "                    index_finger_tip_str += '{}:{:.2f}, '.format(hand_idx, depth_z)\n",
    "                if i in [1,5,9,13,17]: \n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (0, 0, 255), thickness=-1)\n",
    "                if i in [2,6,10,14,18]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius,  (75, 0, 130), thickness=-1)\n",
    "                if i in [3,7,11,15,19]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (238, 130, 238), thickness=-1)\n",
    "                if i in [4,12,16,20]:\n",
    "                    image = cv2.circle(image, (cx, cy), radius, (0, 255, 255), thickness=-1)\n",
    "                # 提取出每一个关节点 赋予对应的颜色和根据掌根的深度\n",
    "        \n",
    "        scaler= 1\n",
    "        image = cv2.putText(image,handness_str, (25*scaler, 100*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
    "        image = cv2.putText(image,index_finger_tip_str, (25*scaler, 150*scaler), cv2.FONT_HERSHEY_SIMPLEX, 1.25*scaler, (0,0,255), 2,)\n",
    "\n",
    "        spend_time = time.time() - start_time\n",
    "        if spend_time > 0:\n",
    "            FPS = 1.0 / spend_time\n",
    "        else:\n",
    "            FPS = 0\n",
    "            \n",
    "        image = cv2.putText(image,'FPS '+str(int(FPS)),(25*scaler,50*scaler),cv2.FONT_HERSHEY_SIMPLEX,1.25*scaler,(0,0,255),2,)\n",
    "        # 显示FPS 检测到的手和食指指尖对于掌根的深度值\n",
    "    \n",
    "    else:\n",
    "        clear_hand_states()\n",
    "        # 如果没检测到手就清空全部信息\n",
    "    \n",
    "    return image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7ce23e80ed36041",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-09-07T05:19:32.248575Z",
     "start_time": "2024-09-07T05:11:28.934663Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\25055\\AppData\\Local\\Temp\\ipykernel_4200\\752492595.py:89: DeprecationWarning: `np.int0` is a deprecated alias for `np.intp`.  (Deprecated NumPy 1.24)\n",
      "  box = np.int0(box)\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[7], line 10\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCamera Error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      8\u001b[0m     \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m frame \u001b[38;5;241m=\u001b[39m \u001b[43mprocess_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mframe\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     11\u001b[0m cv2\u001b[38;5;241m.\u001b[39mimshow(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVideo\u001b[39m\u001b[38;5;124m'\u001b[39m, frame)\n\u001b[0;32m     13\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cv2\u001b[38;5;241m.\u001b[39mwaitKey(\u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m&\u001b[39m \u001b[38;5;241m0xFF\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mord\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mq\u001b[39m\u001b[38;5;124m'\u001b[39m):\n",
      "Cell \u001b[1;32mIn[6], line 9\u001b[0m, in \u001b[0;36mprocess_image\u001b[1;34m(image)\u001b[0m\n\u001b[0;32m      6\u001b[0m image \u001b[38;5;241m=\u001b[39m cv2\u001b[38;5;241m.\u001b[39mcvtColor(image, cv2\u001b[38;5;241m.\u001b[39mCOLOR_BGR2RGB)\n\u001b[0;32m      7\u001b[0m \u001b[38;5;66;03m# 预处理帧\u001b[39;00m\n\u001b[1;32m----> 9\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mhands\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     11\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mmulti_hand_landmarks:\n\u001b[0;32m     12\u001b[0m     \u001b[38;5;66;03m# 如果检测到手\u001b[39;00m\n\u001b[0;32m     14\u001b[0m     handness_str \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
      "File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solutions\\hands.py:153\u001b[0m, in \u001b[0;36mHands.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m    132\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m    133\u001b[0m \u001b[38;5;250m  \u001b[39m\u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the hand landmarks and handedness of each detected hand.\u001b[39;00m\n\u001b[0;32m    134\u001b[0m \n\u001b[0;32m    135\u001b[0m \u001b[38;5;124;03m  Args:\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    150\u001b[0m \u001b[38;5;124;03m         right hand) of the detected hand.\u001b[39;00m\n\u001b[0;32m    151\u001b[0m \u001b[38;5;124;03m  \"\"\"\u001b[39;00m\n\u001b[1;32m--> 153\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32md:\\app-install-dict\\Anaconda3\\envs\\software_engineering\\lib\\site-packages\\mediapipe\\python\\solution_base.py:335\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m    329\u001b[0m   \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    330\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m    331\u001b[0m         stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m    332\u001b[0m         packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m    333\u001b[0m                                  data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 335\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    336\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m    337\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m    338\u001b[0m solution_outputs \u001b[38;5;241m=\u001b[39m collections\u001b[38;5;241m.\u001b[39mnamedtuple(\n\u001b[0;32m    339\u001b[0m     \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSolutionOutputs\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info\u001b[38;5;241m.\u001b[39mkeys())\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m在当前单元格或上一个单元格中执行代码时 Kernel 崩溃。\n",
      "\u001b[1;31m请查看单元格中的代码，以确定故障的可能原因。\n",
      "\u001b[1;31m单击<a href='https://aka.ms/vscodeJupyterKernelCrash'>此处</a>了解详细信息。\n",
      "\u001b[1;31m有关更多详细信息，请查看 Jupyter <a href='command:jupyter.viewOutput'>log</a>。"
     ]
    }
   ],
   "source": [
    "cap = cv2.VideoCapture(1)\n",
    "cap.open(0)\n",
    "\n",
    "while cap.isOpened():\n",
    "    success, frame = cap.read()\n",
    "    if not success:\n",
    "        print(\"Camera Error\")\n",
    "        break\n",
    "      \n",
    "    frame = process_image(frame)\n",
    "    cv2.imshow('Video', frame)\n",
    "    \n",
    "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "        \n",
    "cap.release()\n",
    "cv2.destroyAllWindows()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10fca4bc34a944ea",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "software_engineering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/src_screenshot/utils/hand_gesture.py
+++ b/src_screenshot/utils/hand_gesture.py
@ -0,0 +1,56 @@
 import cv2
 from model import HandTracker
 from index_finger import IndexFingerHandler
 from gesture_data import HandState
 from kalman_filter import KalmanHandler
 from finger_drawer import FingerDrawer

 class HandGestureHandler:
    def __init__(self):
        self.hand_state = HandState()
        self.kalman_handler = KalmanHandler()
        self.hand_tracker = HandTracker()
        self.index_handler = IndexFingerHandler(self.hand_state, self.kalman_handler)

    def handle_hand_gestures(self, image, width, height, is_video):
        results = self.hand_tracker.process(image)

        if results.multi_hand_landmarks:
            handness_str = ''
            index_finger_tip_str = ''

            if len(results.multi_hand_landmarks) == 1:
                detected_hand = results.multi_handedness[0].classification[0].label
                self.hand_state.clear_hand_states(detected_hand)
                # 如果只检测到了一只手 那么就清空另一只手的信息 以免第二只手出现的时候数据冲突

            for hand_idx, hand_21 in enumerate(results.multi_hand_landmarks):
                self.hand_tracker.mp_drawing.draw_landmarks(
                    image, hand_21, self.hand_tracker.mp_hands.HAND_CONNECTIONS
                )
                # 绘制手部关键点连接

                temp_handness = results.multi_handedness[hand_idx].classification[0].label
                handness_str += f'{hand_idx}:{temp_handness}, '
                self.hand_state.is_index_finger_up[temp_handness] = False

                image = self.index_handler.handle_index_finger(
                    image, hand_21, temp_handness, width, height
                )
                # 处理食指

                image, index_finger_tip_str = FingerDrawer.draw_finger_points(image, hand_21, temp_handness, width, height)

            if is_video:
                image = cv2.flip(image, 1)
            image = cv2.putText(image, handness_str, (25, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
            image = cv2.putText(image, index_finger_tip_str, (25, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        else:
            if is_video:
                image = cv2.flip(image, 1)
                # 如果是后置摄像头的输入视频，则需要在处理前翻转图像，确保手势检测的左右手正确；
                # 处理完毕后再翻转回来，以防止最终输出的图像出现镜像错误。
            self.hand_state.clear_hand_states()
            # 如果未检测到手 则清空手部状态

        return image
--- a/src_screenshot/utils/index_finger.py
+++ b/src_screenshot/utils/index_finger.py
@ -0,0 +1,112 @@
 import cv2
 import time
 import numpy as np

 class IndexFingerHandler:
    def __init__(self, hand_state, kalman_handler):
        self.hand_state = hand_state
        self.kalman_handler = kalman_handler
        self.wait_time = 1.5
        self.kalman_wait_time = 0.5
        self.wait_box = 2

    def handle_index_finger(self, image, hand_21, temp_handness, width, height):

        cz0 = hand_21.landmark[0].z
        self.hand_state.index_finger_second[temp_handness] = hand_21.landmark[7]
        self.hand_state.index_finger_tip[temp_handness] = hand_21.landmark[8]

        index_x = int(self.hand_state.index_finger_tip[temp_handness].x * width)
        index_y = int(self.hand_state.index_finger_tip[temp_handness].y * height)

        self.update_index_finger_state(hand_21, temp_handness, index_x, index_y)
        self.draw_index_finger_gesture(image, temp_handness, index_x, index_y, cz0)

        return image
        # 处理食指的状态和手势效果，并更新图像

    def update_index_finger_state(self, hand_21, temp_handness, index_x, index_y):

        if all(self.hand_state.index_finger_second[temp_handness].y < hand_21.landmark[i].y
               for i in range(21) if i not in [7, 8]) and \
                self.hand_state.index_finger_tip[temp_handness].y < self.hand_state.index_finger_second[temp_handness].y:
            self.hand_state.is_index_finger_up[temp_handness] = True
            # 如果食指指尖和第一个关节都大于其他关键点 则判定为食指抬起

        if self.hand_state.is_index_finger_up[temp_handness]:
            if not self.hand_state.gesture_locked[temp_handness]:
                if self.hand_state.gesture_start_time[temp_handness] == 0:
                    self.hand_state.gesture_start_time[temp_handness] = time.time()
                elif time.time() - self.hand_state.gesture_start_time[temp_handness] > self.wait_time:
                    self.hand_state.dragging[temp_handness] = True
                    self.hand_state.gesture_locked[temp_handness] = True
                    self.hand_state.drag_point[temp_handness] = (index_x, index_y)
                    # 如果食指指向操作已经超过了等待的时间 则设定为正式进行指向操作
                self.hand_state.buffer_start_time[temp_handness] = 0
                # 防止识别错误导致指向操作迅速中断的缓冲时间
        else:
            if self.hand_state.buffer_start_time[temp_handness] == 0:
                self.hand_state.buffer_start_time[temp_handness] = time.time()
            elif time.time() - self.hand_state.buffer_start_time[temp_handness] > self.hand_state.buffer_duration[temp_handness]:
                self.hand_state.gesture_start_time[temp_handness] = 0
                self.hand_state.gesture_locked[temp_handness] = False
                self.hand_state.dragging[temp_handness] = False
                # 如果食指指向操作的中断时间已经超过了设定的缓冲时间 则正式终断

    def draw_index_finger_gesture(self, image, temp_handness, index_x, index_y, cz0):

        if self.hand_state.dragging[temp_handness]:
            if self.hand_state.start_drag_time[temp_handness] == 0:
                self.hand_state.start_drag_time[temp_handness] = time.time()
                self.kalman_handler.reset_kalman_filter(temp_handness, index_x, index_y)
                # 如果是首次操作 则记录时间并重置kalman滤波器

            smooth_x, smooth_y = self.kalman_handler.kalman_filter_point(temp_handness, index_x, index_y)
            # 使用kalman滤波器平滑生成的轨迹 减少噪声和抖动

            self.hand_state.drag_point[temp_handness] = (index_x, index_y)
            index_finger_radius = max(int(10 * (1 + (cz0 - self.hand_state.index_finger_tip[temp_handness].z) * 5)), 0)
            cv2.circle(image, self.hand_state.drag_point[temp_handness], index_finger_radius, (0, 0, 255), -1)
            # 根据离掌根的距离同步调整圆圈大小 但是要比FingerDrawer的同比增大一些 可以看清是否锁定指向操作
            drag_point_smooth = (smooth_x, smooth_y)

            if time.time() - self.hand_state.start_drag_time[temp_handness] > self.kalman_wait_time:
                self.hand_state.trajectory[temp_handness].append(drag_point_smooth)
                # 因为滤波器初始化时需要时间稳定数据 所以等待其稳定后再将坐标点加到轨迹中
        else:
            if len(self.hand_state.trajectory[temp_handness]) > 4:
                contour = np.array(self.hand_state.trajectory[temp_handness], dtype=np.int32)
                rect = cv2.minAreaRect(contour)
                box = cv2.boxPoints(rect)
                box = np.int64(box)
                # 当拖拽点数大于4时则计算最小外接矩形
                self.hand_state.rect_draw_time[temp_handness] = time.time()
                self.hand_state.last_drawn_box[temp_handness] = box

            self.hand_state.start_drag_time[temp_handness] = 0
            self.hand_state.trajectory[temp_handness].clear()
            # 重置 清空

        for i in range(1, len(self.hand_state.trajectory[temp_handness])):
            pt1 = (int(self.hand_state.trajectory[temp_handness][i-1][0]), int(self.hand_state.trajectory[temp_handness][i-1][1]))
            pt2 = (int(self.hand_state.trajectory[temp_handness][i][0]), int(self.hand_state.trajectory[temp_handness][i][1]))
            cv2.line(image, pt1, pt2, (0, 0, 255), 2)
            # 绘制拖拽路径

        if self.hand_state.last_drawn_box[temp_handness] is not None:
            elapsed_time = time.time() - self.hand_state.rect_draw_time[temp_handness]
            if elapsed_time < self.wait_box:
                cv2.drawContours(image, [self.hand_state.last_drawn_box[temp_handness]], 0, (0, 255, 0), 2)
                # 为了方便观测 需要保留显示包围框一定时间
            elif elapsed_time >= self.wait_box - 0.1:
                box = self.hand_state.last_drawn_box[temp_handness]
                x_min = max(0, min(box[:, 0]))
                y_min = max(0, min(box[:, 1]))
                x_max = min(image.shape[1], max(box[:, 0]))
                y_max = min(image.shape[0], max(box[:, 1]))
                cropped_image = image[y_min:y_max, x_min:x_max]
                filename = f"../image/cropped_{temp_handness}_{int(time.time())}.jpg"
                cv2.imwrite(filename, cropped_image)
                self.hand_state.last_drawn_box[temp_handness] = None
                # 因为如果画完包围框立即剪裁 很有可能把手错误的剪裁进去
                # 所以在包围框消失的前0.1秒剪裁 这样有足够的时间让手移走
--- a/src_screenshot/utils/kalman_filter.py
+++ b/src_screenshot/utils/kalman_filter.py
@ -0,0 +1,36 @@
 import numpy as np
 from filterpy.kalman import KalmanFilter

 class KalmanHandler:
    def __init__(self):
        self.kalman_filters = {
            'Left': KalmanFilter(dim_x=4, dim_z=2),
            'Right': KalmanFilter(dim_x=4, dim_z=2)
        }
        for key in self.kalman_filters:
            self.kalman_filters[key].x = np.array([0., 0., 0., 0.])
            self.kalman_filters[key].F = np.array([[1, 0, 1, 0],
                                                   [0, 1, 0, 1],
                                                   [0, 0, 1, 0],
                                                   [0, 0, 0, 1]])
            self.kalman_filters[key].H = np.array([[1, 0, 0, 0],
                                                   [0, 1, 0, 0]])
            self.kalman_filters[key].P *= 1000.
            self.kalman_filters[key].R = 3
            self.kalman_filters[key].Q = np.eye(4) * 0.01
            # 这些参数通过多次测试得出 表现较为稳定

    def kalman_filter_point(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.predict()
        kf.update([x, y])
        # 更新状态
        return (kf.x[0], kf.x[1])

    def reset_kalman_filter(self, hand_label, x, y):

        kf = self.kalman_filters[hand_label]
        kf.x = np.array([x, y, 0., 0.])
        kf.P *= 1000.
        # 重置
--- a/src_screenshot/utils/model.py
+++ b/src_screenshot/utils/model.py
@ -0,0 +1,17 @@
 import mediapipe as mp

 class HandTracker:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            # 一只会更稳定
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        self.mp_drawing = mp.solutions.drawing_utils

    def process(self, image):
        results = self.hands.process(image)
        return results
--- a/src_screenshot/utils/process_images.py
+++ b/src_screenshot/utils/process_images.py
@ -0,0 +1,24 @@
 import cv2
 import time
 from hand_gesture import HandGestureHandler

 class HandGestureProcessor:
    def __init__(self):
        self.hand_handler = HandGestureHandler()

    def process_image(self, image, is_video):

        start_time = time.time()
        height, width = image.shape[:2]
        image = cv2.flip(image, 1)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 预处理传入的视频帧

        image = self.hand_handler.handle_hand_gestures(image, width, height, is_video)

        spend_time = time.time() - start_time
        FPS = 1.0 / spend_time if spend_time > 0 else 0
        image = cv2.putText(image, f'FPS {int(FPS)}', (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 0, 255), 2)
        # 计算并显示帧率

        return image
--- a/src_screenshot/utils/video_recognition.py
+++ b/src_screenshot/utils/video_recognition.py
@ -0,0 +1,65 @@
 import cv2
 from process_images import HandGestureProcessor
 from tkinter import messagebox
 from PIL import Image, ImageTk

 def start_camera(canvas):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        return "Cannot open camera"

    gesture_processor = HandGestureProcessor()
    show_frame(canvas, cap, gesture_processor)

 def show_frame(canvas, cap, gesture_processor):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,False)
        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        # 对该帧进行处理并转换为RGB显示在画布上
        canvas.after(10, show_frame, canvas, cap, gesture_processor)
        # 实现循环调用 持续处理并显示后续的每一帧
    else:
        cap.release()
        cv2.destroyAllWindows()

 def upload_and_process_video(canvas, video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Cannot open video file"

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    # 获取视频的参数

    output_filename = "../video/processed_output.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height))
    # 设置输出视频文件路径和编码

    gesture_processor = HandGestureProcessor()
    process_video_frame(canvas, cap, gesture_processor, out)

 def process_video_frame(canvas, cap, gesture_processor, out):
    success, frame = cap.read()
    if success:
        processed_frame = gesture_processor.process_image(frame,True)
        out.write(processed_frame)

        img = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        imgtk = ImageTk.PhotoImage(image=img)
        canvas.imgtk = imgtk
        canvas.create_image(0, 0, anchor="nw", image=imgtk)
        canvas.after(10, process_video_frame, canvas, cap, gesture_processor, out)
    else:
        cap.release()
        out.release()
        cv2.destroyAllWindows()
        messagebox.showinfo("Info", "Processed video saved as processed_output.avi")
        print("Processed video saved as processed_output.avi")
--- a/src_voice_tip/README.md
+++ b/src_voice_tip/README.md
@ -0,0 +1,2 @@

 一个基于 Python 的实时语音字幕显示程序，可以将用户的语音实时转换为屏幕上的字幕文本。支持中文和英文识别
--- a/src_voice_tip/requirements.txt
+++ b/src_voice_tip/requirements.txt
@ -0,0 +1,3 @@
 vosk
 sounddevice
 numpy
--- a/src_voice_tip/src/voice.py
+++ b/src_voice_tip/src/voice.py
@ -0,0 +1,220 @@
 import tkinter as tk
 import threading
 import queue
 import time
 import json
 import sounddevice as sd
 import numpy as np
 from vosk import Model, KaldiRecognizer
 import os
 import platform


 class VoiceSubtitleApp:
    def __init__(self, root):
        self.root = root
        self.root.title("实时语音字幕")

        # 设置窗口属性：始终置顶，无边框，完全透明
        if platform.system() == 'Darwin':  # macOS
            self.root.attributes('-topmost', 1)
            self.root.attributes('-alpha', 1.0)
            self.root.attributes('-transparent', True)
            self.root.configure(bg='black')  # 使用黑色背景
            self.root.wm_attributes('-transparent', True)
            self.root.update_idletasks()
            self.root.lift()
        else:  # Windows 和其他系统
            self.root.attributes('-topmost', True)
            self.root.attributes('-alpha', 1.0)
            self.root.configure(bg='black')

        self.root.overrideredirect(True)  # 无边框模式

        # 设置窗口大小和位置
        self.window_width = 800
        self.window_height = 100
        screen_width = root.winfo_screenwidth()
        screen_height = root.winfo_screenheight()
        x = (screen_width - self.window_width) // 2
        y = screen_height - self.window_height - 100
        self.root.geometry(f"{self.window_width}x{self.window_height}+{x}+{y}")

        # 创建文字标签
        self.text_label = tk.Label(
            root,
            text="",  # 初始时不显示文字
            font=("Arial", 24, "bold"),
            fg="white",
            bg='black',  # 使用黑色背景
            wraplength=780,
            highlightthickness=0,
            borderwidth=0
        )
        self.text_label.pack(expand=True, fill='both', padx=10)

        # 添加拖动功能
        self.text_label.bind('<Button-1>', self.start_move)
        self.text_label.bind('<B1-Motion>', self.on_move)

        # 添加右键点击退出功能
        self.text_label.bind('<Button-3>', lambda e: self.on_closing())

        self.is_running = True
        self.audio_queue = queue.Queue()
        self.partial_result = ""
        self.last_voice_time = time.time()

        try:
            # 初始化Vosk模型
            print("正在加载语音识别模型...")
            model_path = "../vosk-model-cn-0.22"
            if not os.path.exists(model_path):
                model_path = "../vosk-model-small-cn-0.22"
                print("未找到中型模型，使用小型模型")

            if not os.path.exists(model_path):
                print(f"错误：找不到模型文件夹 {model_path}")
                self.update_subtitle(f"错误：找不到模型文件夹 {model_path}")
                return

            print(f"使用模型: {model_path}")
            self.model = Model(model_path)
            self.recognizer = KaldiRecognizer(self.model, 16000)
            self.recognizer.SetMaxAlternatives(0)
            self.recognizer.SetWords(True)
            print("模型加载完成")

            # 获取可用的音频设备
            devices = sd.query_devices()
            print("可用的音频设备：")
            for i, device in enumerate(devices):
                print(f"{i}: {device['name']}")

            # 使用默认输入设备
            default_input = sd.query_devices(kind='input')
            print(f"使用默认输入设备: {default_input['name']}")

            # 启动音频处理线程
            self.audio_thread = threading.Thread(target=self.process_audio)
            self.audio_thread.daemon = True
            self.audio_thread.start()

            # 启动识别线程
            self.recognition_thread = threading.Thread(target=self.recognize_speech)
            self.recognition_thread.daemon = True
            self.recognition_thread.start()

        except Exception as e:
            print(f"初始化错误: {str(e)}")
            self.update_subtitle(f"初始化失败: {str(e)}")
            return

    def start_move(self, event):
        """开始拖动窗口"""
        self.x = event.x
        self.y = event.y

    def on_move(self, event):
        """处理窗口拖动"""
        deltax = event.x - self.x
        deltay = event.y - self.y
        x = self.root.winfo_x() + deltax
        y = self.root.winfo_y() + deltay
        self.root.geometry(f"+{x}+{y}")

    def audio_callback(self, indata, frames, time, status):
        """音频回调函数"""
        if status:
            print(status)
        self.audio_queue.put(bytes(indata))

    def process_audio(self):
        """处理音频输入"""
        try:
            with sd.RawInputStream(samplerate=16000, channels=1, dtype='int16',
                                   blocksize=4000,
                                   device=None,
                                   callback=self.audio_callback):
                print("开始录音...")
                while self.is_running:
                    time.sleep(0.05)
                    self.root.after(0, self.fade_out_text)
        except Exception as e:
            print(f"音频处理错误: {str(e)}")
            self.root.after(0, self.update_subtitle, f"音频处理错误: {str(e)}")

    def recognize_speech(self):
        """语音识别处理"""
        print("开始识别...")

        while self.is_running:
            try:
                audio_data = self.audio_queue.get(timeout=0.5)
                if len(audio_data) == 0:
                    continue

                if self.recognizer.AcceptWaveform(audio_data):
                    result = json.loads(self.recognizer.Result())
                    text = result.get("text", "").strip()
                    if text:
                        print(f"最终结果: {text}")
                        self.last_voice_time = time.time()
                        self.root.after(0, self.update_subtitle, text)
                else:
                    partial = json.loads(self.recognizer.PartialResult())
                    partial_text = partial.get("partial", "").strip()
                    if partial_text and partial_text != self.partial_result:
                        self.partial_result = partial_text
                        print(f"部分结果: {partial_text}")
                        self.last_voice_time = time.time()
                        self.root.after(0, self.update_subtitle, partial_text)

            except queue.Empty:
                continue
            except Exception as e:
                print(f"识别错误: {str(e)}")
                time.sleep(0.1)

    def fade_out_text(self):
        """文字淡出效果"""
        try:
            if time.time() - self.last_voice_time > 3:  # 3秒无输入后开始淡化
                current_color = self.text_label.cget('fg')
                if current_color == 'white':  # 如果是完全不透明
                    self.text_label.configure(fg='#FFFFFF')  # 设置初始颜色
                else:
                    # 提取当前颜色值
                    color = current_color.lstrip('#')
                    if len(color) == 6:  # 确保是有效的颜色值
                        # 降低不透明度
                        new_alpha = max(0, int(color[0:2], 16) - 15)
                        if new_alpha > 0:  # 如果还没有完全透明
                            new_color = f'#{new_alpha:02x}{new_alpha:02x}{new_alpha:02x}'
                            self.text_label.configure(fg=new_color)
                            self.root.after(50, self.fade_out_text)  # 继续淡化
                        else:
                            # 完全透明时清空文字
                            self.text_label.config(text="")
                            self.text_label.update()
        except Exception as e:
            print(f"淡化效果错误: {str(e)}")

    def update_subtitle(self, text):
        """更新字幕文本"""
        if not text:
            return
        self.text_label.config(text=text)
        self.text_label.configure(fg='white')
        self.text_label.update()
        self.last_voice_time = time.time()

    def on_closing(self):
        self.is_running = False
        self.root.destroy()


 if __name__ == "__main__":
    root = tk.Tk()
    app = VoiceSubtitleApp(root)
    root.mainloop()
--- a/src_voice_tip/vosk-model-small-cn-0.22/README
+++ b/src_voice_tip/vosk-model-small-cn-0.22/README
@ -0,0 +1,6 @@
 Chinese Vosk model for mobile

 CER results

 23.54% speechio_02
 38.29% speechio_06
--- a/src_voice_tip/vosk-model-small-cn-0.22/am/final.mdl
+++ b/src_voice_tip/vosk-model-small-cn-0.22/am/final.mdl
--- a/src_voice_tip/vosk-model-small-cn-0.22/conf/mfcc.conf
+++ b/src_voice_tip/vosk-model-small-cn-0.22/conf/mfcc.conf
@ -0,0 +1,8 @@
 --use-energy=false
 --sample-frequency=16000
 --num-mel-bins=40
 --num-ceps=40
 --low-freq=40
 --high-freq=-200
 --allow-upsample=true
 --allow-downsample=true
--- a/src_voice_tip/vosk-model-small-cn-0.22/conf/model.conf
+++ b/src_voice_tip/vosk-model-small-cn-0.22/conf/model.conf
@ -0,0 +1,10 @@
 --min-active=200
 --max-active=5000
 --beam=12.0
 --lattice-beam=4.0
 --acoustic-scale=1.0
 --frame-subsampling-factor=3
 --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
 --endpoint.rule2.min-trailing-silence=0.5
 --endpoint.rule3.min-trailing-silence=1.0
 --endpoint.rule4.min-trailing-silence=2.0
--- a/src_voice_tip/vosk-model-small-cn-0.22/graph/Gr.fst
+++ b/src_voice_tip/vosk-model-small-cn-0.22/graph/Gr.fst
--- a/src_voice_tip/vosk-model-small-cn-0.22/graph/HCLr.fst
+++ b/src_voice_tip/vosk-model-small-cn-0.22/graph/HCLr.fst
--- a/src_voice_tip/vosk-model-small-cn-0.22/graph/disambig_tid.int
+++ b/src_voice_tip/vosk-model-small-cn-0.22/graph/disambig_tid.int
@ -0,0 +1,39 @@
 11845
 11846
 11847
 11848
 11849
 11850
 11851
 11852
 11853
 11854
 11855
 11856
 11857
 11858
 11859
 11860
 11861
 11862
 11863
 11864
 11865
 11866
 11867
 11868
 11869
 11870
 11871
 11872
 11873
 11874
 11875
 11876
 11877
 11878
 11879
 11880
 11881
 11882
 11883
--- a/src_voice_tip/vosk-model-small-cn-0.22/graph/phones/word_boundary.int
+++ b/src_voice_tip/vosk-model-small-cn-0.22/graph/phones/word_boundary.int
@ -0,0 +1,646 @@
 1 nonword
 2 begin
 3 end
 4 internal
 5 singleton
 6 nonword
 7 begin
 8 end
 9 internal
 10 singleton
 11 begin
 12 end
 13 internal
 14 singleton
 15 begin
 16 end
 17 internal
 18 singleton
 19 begin
 20 end
 21 internal
 22 singleton
 23 begin
 24 end
 25 internal
 26 singleton
 27 begin
 28 end
 29 internal
 30 singleton
 31 begin
 32 end
 33 internal
 34 singleton
 35 begin
 36 end
 37 internal
 38 singleton
 39 begin
 40 end
 41 internal
 42 singleton
 43 begin
 44 end
 45 internal
 46 singleton
 47 begin
 48 end
 49 internal
 50 singleton
 51 begin
 52 end
 53 internal
 54 singleton
 55 begin
 56 end
 57 internal
 58 singleton
 59 begin
 60 end
 61 internal
 62 singleton
 63 begin
 64 end
 65 internal
 66 singleton
 67 begin
 68 end
 69 internal
 70 singleton
 71 begin
 72 end
 73 internal
 74 singleton
 75 begin
 76 end
 77 internal
 78 singleton
 79 begin
 80 end
 81 internal
 82 singleton
 83 begin
 84 end
 85 internal
 86 singleton
 87 begin
 88 end
 89 internal
 90 singleton
 91 begin
 92 end
 93 internal
 94 singleton
 95 begin
 96 end
 97 internal
 98 singleton
 99 begin
 100 end
 101 internal
 102 singleton
 103 begin
 104 end
 105 internal
 106 singleton
 107 begin
 108 end
 109 internal
 110 singleton
 111 begin
 112 end
 113 internal
 114 singleton
 115 begin
 116 end
 117 internal
 118 singleton
 119 begin
 120 end
 121 internal
 122 singleton
 123 begin
 124 end
 125 internal
 126 singleton
 127 begin
 128 end
 129 internal
 130 singleton
 131 begin
 132 end
 133 internal
 134 singleton
 135 begin
 136 end
 137 internal
 138 singleton
 139 begin
 140 end
 141 internal
 142 singleton
 143 begin
 144 end
 145 internal
 146 singleton
 147 begin
 148 end
 149 internal
 150 singleton
 151 begin
 152 end
 153 internal
 154 singleton
 155 begin
 156 end
 157 internal
 158 singleton
 159 begin
 160 end
 161 internal
 162 singleton
 163 begin
 164 end
 165 internal
 166 singleton
 167 begin
 168 end
 169 internal
 170 singleton
 171 begin
 172 end
 173 internal
 174 singleton
 175 begin
 176 end
 177 internal
 178 singleton
 179 begin
 180 end
 181 internal
 182 singleton
 183 begin
 184 end
 185 internal
 186 singleton
 187 begin
 188 end
 189 internal
 190 singleton
 191 begin
 192 end
 193 internal
 194 singleton
 195 begin
 196 end
 197 internal
 198 singleton
 199 begin
 200 end
 201 internal
 202 singleton
 203 begin
 204 end
 205 internal
 206 singleton
 207 begin
 208 end
 209 internal
 210 singleton
 211 begin
 212 end
 213 internal
 214 singleton
 215 begin
 216 end
 217 internal
 218 singleton
 219 begin
 220 end
 221 internal
 222 singleton
 223 begin
 224 end
 225 internal
 226 singleton
 227 begin
 228 end
 229 internal
 230 singleton
 231 begin
 232 end
 233 internal
 234 singleton
 235 begin
 236 end
 237 internal
 238 singleton
 239 begin
 240 end
 241 internal
 242 singleton
 243 begin
 244 end
 245 internal
 246 singleton
 247 begin
 248 end
 249 internal
 250 singleton
 251 begin
 252 end
 253 internal
 254 singleton
 255 begin
 256 end
 257 internal
 258 singleton
 259 begin
 260 end
 261 internal
 262 singleton
 263 begin
 264 end
 265 internal
 266 singleton
 267 begin
 268 end
 269 internal
 270 singleton
 271 begin
 272 end
 273 internal
 274 singleton
 275 begin
 276 end
 277 internal
 278 singleton
 279 begin
 280 end
 281 internal
 282 singleton
 283 begin
 284 end
 285 internal
 286 singleton
 287 begin
 288 end
 289 internal
 290 singleton
 291 begin
 292 end
 293 internal
 294 singleton
 295 begin
 296 end
 297 internal
 298 singleton
 299 begin
 300 end
 301 internal
 302 singleton
 303 begin
 304 end
 305 internal
 306 singleton
 307 begin
 308 end
 309 internal
 310 singleton
 311 begin
 312 end
 313 internal
 314 singleton
 315 begin
 316 end
 317 internal
 318 singleton
 319 begin
 320 end
 321 internal
 322 singleton
 323 begin
 324 end
 325 internal
 326 singleton
 327 begin
 328 end
 329 internal
 330 singleton
 331 begin
 332 end
 333 internal
 334 singleton
 335 begin
 336 end
 337 internal
 338 singleton
 339 begin
 340 end
 341 internal
 342 singleton
 343 begin
 344 end
 345 internal
 346 singleton
 347 begin
 348 end
 349 internal
 350 singleton
 351 begin
 352 end
 353 internal
 354 singleton
 355 begin
 356 end
 357 internal
 358 singleton
 359 begin
 360 end
 361 internal
 362 singleton
 363 begin
 364 end
 365 internal
 366 singleton
 367 begin
 368 end
 369 internal
 370 singleton
 371 begin
 372 end
 373 internal
 374 singleton
 375 begin
 376 end
 377 internal
 378 singleton
 379 begin
 380 end
 381 internal
 382 singleton
 383 begin
 384 end
 385 internal
 386 singleton
 387 begin
 388 end
 389 internal
 390 singleton
 391 begin
 392 end
 393 internal
 394 singleton
 395 begin
 396 end
 397 internal
 398 singleton
 399 begin
 400 end
 401 internal
 402 singleton
 403 begin
 404 end
 405 internal
 406 singleton
 407 begin
 408 end
 409 internal
 410 singleton
 411 begin
 412 end
 413 internal
 414 singleton
 415 begin
 416 end
 417 internal
 418 singleton
 419 begin
 420 end
 421 internal
 422 singleton
 423 begin
 424 end
 425 internal
 426 singleton
 427 begin
 428 end
 429 internal
 430 singleton
 431 begin
 432 end
 433 internal
 434 singleton
 435 begin
 436 end
 437 internal
 438 singleton
 439 begin
 440 end
 441 internal
 442 singleton
 443 begin
 444 end
 445 internal
 446 singleton
 447 begin
 448 end
 449 internal
 450 singleton
 451 begin
 452 end
 453 internal
 454 singleton
 455 begin
 456 end
 457 internal
 458 singleton
 459 begin
 460 end
 461 internal
 462 singleton
 463 begin
 464 end
 465 internal
 466 singleton
 467 begin
 468 end
 469 internal
 470 singleton
 471 begin
 472 end
 473 internal
 474 singleton
 475 begin
 476 end
 477 internal
 478 singleton
 479 begin
 480 end
 481 internal
 482 singleton
 483 begin
 484 end
 485 internal
 486 singleton
 487 begin
 488 end
 489 internal
 490 singleton
 491 begin
 492 end
 493 internal
 494 singleton
 495 begin
 496 end
 497 internal
 498 singleton
 499 begin
 500 end
 501 internal
 502 singleton
 503 begin
 504 end
 505 internal
 506 singleton
 507 begin
 508 end
 509 internal
 510 singleton
 511 begin
 512 end
 513 internal
 514 singleton
 515 begin
 516 end
 517 internal
 518 singleton
 519 begin
 520 end
 521 internal
 522 singleton
 523 begin
 524 end
 525 internal
 526 singleton
 527 begin
 528 end
 529 internal
 530 singleton
 531 begin
 532 end
 533 internal
 534 singleton
 535 begin
 536 end
 537 internal
 538 singleton
 539 begin
 540 end
 541 internal
 542 singleton
 543 begin
 544 end
 545 internal
 546 singleton
 547 begin
 548 end
 549 internal
 550 singleton
 551 begin
 552 end
 553 internal
 554 singleton
 555 begin
 556 end
 557 internal
 558 singleton
 559 begin
 560 end
 561 internal
 562 singleton
 563 begin
 564 end
 565 internal
 566 singleton
 567 begin
 568 end
 569 internal
 570 singleton
 571 begin
 572 end
 573 internal
 574 singleton
 575 begin
 576 end
 577 internal
 578 singleton
 579 begin
 580 end
 581 internal
 582 singleton
 583 begin
 584 end
 585 internal
 586 singleton
 587 begin
 588 end
 589 internal
 590 singleton
 591 begin
 592 end
 593 internal
 594 singleton
 595 begin
 596 end
 597 internal
 598 singleton
 599 begin
 600 end
 601 internal
 602 singleton
 603 begin
 604 end
 605 internal
 606 singleton
 607 begin
 608 end
 609 internal
 610 singleton
 611 begin
 612 end
 613 internal
 614 singleton
 615 begin
 616 end
 617 internal
 618 singleton
 619 begin
 620 end
 621 internal
 622 singleton
 623 begin
 624 end
 625 internal
 626 singleton
 627 begin
 628 end
 629 internal
 630 singleton
 631 begin
 632 end
 633 internal
 634 singleton
 635 begin
 636 end
 637 internal
 638 singleton
 639 begin
 640 end
 641 internal
 642 singleton
 643 begin
 644 end
 645 internal
 646 singleton
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.dubm
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.dubm
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.ie
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.ie
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.mat
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/final.mat
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/global_cmvn.stats
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/global_cmvn.stats
@ -0,0 +1,3 @@
 [
  1.117107e+11 -7.827721e+08 -1.101398e+10 -2.193934e+09 -1.347332e+10 -1.613916e+10 -1.199561e+10 -1.255081e+10 -1.638895e+10 -3.821099e+09 -1.372833e+10 -5.244242e+09 -1.098187e+10 -3.655235e+09 -9.364579e+09 -4.285302e+09 -6.296873e+09 -1.552953e+09 -3.176746e+09 -1.202976e+08 -9.857023e+08 2.316555e+08 -1.61059e+08 -5.891868e+07 3.465849e+08 -1.842054e+08 3.248211e+08 -1.483965e+08 3.739239e+08 -6.672061e+08 4.442288e+08 -9.274889e+08 5.142684e+08 4.292036e+07 2.206386e+08 -4.532715e+08 -2.092499e+08 -3.70488e+08 -8.079404e+07 -8.425977e+07 1.344125e+09 
  9.982632e+12 1.02635e+12 8.634624e+11 9.06451e+11 9.652096e+11 1.12772e+12 9.468372e+11 9.141218e+11 9.670484e+11 6.936961e+11 8.141006e+11 6.256321e+11 6.087707e+11 4.616898e+11 4.212042e+11 2.862872e+11 2.498089e+11 1.470856e+11 1.099197e+11 5.780894e+10 3.118114e+10 1.060667e+10 1.466199e+09 4.173056e+08 5.257362e+09 1.277714e+10 2.114478e+10 2.974502e+10 3.587691e+10 4.078971e+10 4.247745e+10 4.382608e+10 4.62521e+10 4.575282e+10 3.546206e+10 3.041531e+10 2.838562e+10 2.258604e+10 1.715295e+10 1.303227e+10 0 ]
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/online_cmvn.conf
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/online_cmvn.conf
--- a/src_voice_tip/vosk-model-small-cn-0.22/ivector/splice.conf
+++ b/src_voice_tip/vosk-model-small-cn-0.22/ivector/splice.conf
@ -0,0 +1,2 @@
 --left-context=3
 --right-context=3