网站首页 > 技术文章正文

利用神经网络模型检测摄像头上的可疑行为

nanyue 2024-10-01 13:08:49 技术文章 36 ℃

您可能想知道如何检测网络摄像头视频Feed中的可疑行为？我们将使用您计算机的网络摄像头作为视频源，用于训练数据和测试您的神经网络模型。这种方法是使用迁移学习的监督学习。

你需要遵循什么

您应该可以访问安装了以下组件的计算机。

Python 3
Keras/Tensorflow
Pillow （PIL）
NumPy
CV2

他们都可以通过pip和conda。

虽然，我已经在Mac上测试了这段Python代码，但它应该适用于任何系统。给出的文字转语音是唯一的例外，我以前用它subprocess.call()来调用Mac OS X say命令。您的操作系统上可能有一个等效的命令。

导入Python库

# Create training videos
import cv2
import numpy as np
from time import sleep
import glob
import os
import sys
from PIL import Image
import subprocess
NUM_FRAMES = 100
TAKES_PER = 2
CLASSES = ['SAFE', 'DANGER']
NEG_IDX = 0
POS_IDX = 1
HIDDEN_SIZE = 256
MODEL_PATH='model.h5'
TRAIN_MODEL = True
EPOCHS = 10
HIDDEN_SIZE = 16

准备数据

首先，我们需要一些训练数据来学习。我们需要“可疑”和“安全”行为的视频，因此请准备好行动！为了更容易训练我们的模型，您可以抓住玩具枪或其他可识别的物品来处理“可疑”场景。这样，在没有大量训练数据的情况下，您的模型将更容易分离两个案例。

这是一段Python代码片段，可从计算机的网络摄像头中捕获四个视频（两个可疑和两个安全），并将它们存储在一个data目录中供以后处理。

def capture(num_frames, path='out.avi'):
 
 # Create a VideoCapture object
 cap = cv2.VideoCapture(0)
 # Check if camera opened successfully
 if (cap.isOpened() == False): 
 print("Unable to read camera feed")
 # Default resolutions of the frame are obtained.The default resolutions are system dependent.
 # We convert the resolutions from float to integer.
 frame_width = int(cap.get(3))
 frame_height = int(cap.get(4))
 # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
 out = cv2.VideoWriter(path, cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))
 print('Recording started')
 for i in range(num_frames):
 ret, frame = cap.read()
 if ret == True: 
 # Write the frame into the file 'output.avi'
 out.write(frame)
 # When everything done, release the video capture and video write objects
 cap.release()
 out.release()
 
for take in range(VIDEOS_PER_CLASS):
 for cla in CLASSES:
 path = 'data/{}{}.avi'.format(cla, take)
 print('Get ready to act:', cla)
 # Only works on Mac
 subprocess.call(['say', 'get ready to act {}'.format(cla)])
 capture(FRAMES_PER_VIDEO, path=path)

看看data目录中的视频。你视频根据类别命名，例如SAFE1.avi用于安全视频。

使用预训练的模型从视频中提取特征

接下来，您需要将这些视频转换为机器学习算法可以训练的内容。为此，我们将重新利用经过预训练的VGG16网络，该神经网络已在ImageNet上接受过训练。Python实现如下：

# Create X, y series
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
import numpy as np
class VGGFramePreprocessor():
 
 def __init__(self, vgg_model):
 self.vgg_model = vgg_model
 
 def process(self, frame):
 img_data = cv2.resize(frame,(224,224))
 img_data = np.expand_dims(img_data, axis=0)
 img_data = preprocess_input(img_data)
 x = self.vgg_model.predict(img_data).flatten()
 x = np.expand_dims(x, axis=0)
 return x
def get_video_frames(video_path):
 vidcap = cv2.VideoCapture(video_path)
 success, frame = vidcap.read()
 while success:
 yield frame
 success,frame = vidcap.read()
 vidcap.release()
frame_preprocessor = VGGFramePreprocessor(VGG16(weights='imagenet', include_top=False))
 
if TRAIN_MODEL:
 # Load movies and transform frames to features
 movies = []
 X = []
 y = []
 for video_path in glob.glob('data/*.avi'):
 print('preprocessing', video_path)
 positive = CLASSES[POS_IDX] in video_path
 _X = np.concatenate([frame_preprocessor.process(frame) for frame in get_video_frames(video_path)])
 _y = np.array(_X.shape[0] * [[int(not positive), int(positive)]])
 X.append(_X)
 y.append(_y)
 X = np.concatenate(X)
 y = np.concatenate(y)
 print(X.shape)
 print(y.shape)

训练分类器

现在我们有了X和Y序列，现在是时候训练神经网络模型来区分可疑行为和安全行为了！在此示例中，我们将使用深度神经网络。你可以根据需要进行调整。Python代码如下：

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
MODEL_PATH='model.h5'
EPOCHS = 10
HIDDEN_SIZE = 16
if TRAIN_MODEL:
 model = Sequential()
 model.add(Dense(HIDDEN_SIZE, input_shape=(X.shape[1],)))
 model.add(Dense(HIDDEN_SIZE))
 model.add(Dropout(0.2))
 model.add(Dense(len(CLASSES), activation='softmax'))
 model.compile(loss='categorical_crossentropy',
 optimizer='rmsprop',
 metrics=['accuracy'])
 x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42)
 model.fit(x_train, y_train,
 batch_size=10, epochs=EPOCHS,
 validation_split=0.1)
 model.save(MODEL_PATH)
 y_true = [np.argmax(y) for y in y_test]
 y_pred = [np.argmax(pred) for pred in model.predict(x_test)]
 score = f1_score(y_true, y_pred)
 print('F1:', score) 
else:
 model = load_model(MODEL_PATH)

准备测试！

现在到了有趣的部分。现在我们将使用我们构建的所有部分。是时候将计算机的网络摄像头变成现场CCTV行为检测器了！

# Infer on live video
from math import ceil
import subprocess
TEST_FRAMES = 500
# Initialize camera
cap = cv2.VideoCapture(0)
# Check if camera opened successfully
if (cap.isOpened() == False): 
 print("Unable to read camera feed")
 test_frames = 0
# Start processing video
for i in range(TEST_FRAMES):
 ret, frame = cap.read()
 if not ret: continue
 x_pred = frame_preprocessor.process(frame)
 y_pred = model.predict(x_pred)[0]
 conf_negative = y_pred[NEG_IDX]
 conf_positive = y_pred[POS_IDX]
 cla = CLASSES[np.argmax(y_pred)]
 if cla == CLASSES[POS_IDX]:
 subprocess.call(['say', CLASSES[POS_IDX]])
 progress = int(100 * (i / TEST_FRAMES))
 message = 'testing {}% conf_neg = {:.02f} conf_pos = {:.02f} class = {} \r'.format(progress, conf_negative, conf_positive, cla)
 sys.stdout.write(message)
 sys.stdout.flush()
cap.release()

结论

我希望你喜欢这个关于检测CCTV视频中可疑行为的教程。

一个明显的选择是在单一帧或帧序列上训练。为了简单起见，我为这个示例选择了单个帧，因为我们可以跳过一些正交任务，例如缓冲图像和排序训练数据。如果你想训练序列，你可以使用LSTM。

上一篇：使用神经网络的自动化特征工程（神经网络的特点及使用场景）
下一篇： RealPython 基础教程:Python 字典用法详解

网站首页 > 技术文章 正文