Data Preprocessing

Below is the code for loading and transforming the data, converting it from JSON files into a manageable data format.

import numpy as np import pandas as pd import json import matplotlib.pyplotines as plt import seaborn as sns import plotly.graph_objects as go from matplotlib.colors import ListedColormap import matplotlib.patches as mpatches def load_and_transform_json(filename): with open(filename, 'r') as file: file_json = json.load(file) data = [] for item in file_json: frame_number = item['frame_number'] for prediction in item['predictions']: label = prediction['label'] coordinates = prediction['coordinates'] data.append({ 'frame_number': frame_number, 'label': label, 'x': coordinates['x'], 'y': coordinates['y'], 'width': coordinates['width'], 'height': coordinates['height'] }) df = pd.DataFrame(data) return df # Loading the data df_clip_1 = load_and_transform_json('clip_1.json') df_clip_2 = load_and_transform_json('clip_2.json') df_clip_3 = load_and_transform_json('clip_3.json') df_clip_4 = load_and_transform_json('clip_4.json') df_clip_5 = load_and_transform_json('clip_5.json')

Visualization of clip data

df_clip_1.head()

	frame_number	label	x	y	width	height
0	1	B-person	461.0	313.0	122	250
1	2	B-person	462.5	313.0	121	252
2	3	B-person	462.0	312.0	124	254
3	4	B-person	461.0	311.5	122	255
4	5	B-person	461.0	311.5	122	255

# Counting the number of frames for each clip print(f"Number of frames in clip 1: {df_clip_1['frame_number'].nunique()}") print(f"Number of frames in clip 2: {df_clip_2['frame_number'].nunique()}") print(f"Number of frames in clip 3: {df_clip_3['frame_number'].nunique()}") print(f"Number of frames in clip 4: {df_clip_4['frame_number'].nunique()}") print(f"Number of frames in clip 5: {df_clip_5['frame_number'].nunique()}")

It can be observed that not all clips have the same number of frames.

def plot_heatmap(df, title): # Defining image size (640x640) width, height = 640, 640 heatmap = np.zeros((height, width)) for index, row in df.iterrows(): x_center = int(row['x']) y_center = int(row['y']) w = int(row['width']) h = int(row['height']) # Calculating bounding box limits x_min = max(0, x_center - w // 2) x_max = min(width, x_center + w // 2) y_min = max(0, y_center - h // 2) y_max = min(height, y_center + h // 2) # Incrementing the value in the bounding box region heatmap[y_min:y_max, x_min:x_max] += 1 plt.figure(figsize=(8, 8)) sns.heatmap(heatmap, cmap='hot', cbar=True) plt.title(title) plt.xlabel('X') plt.ylabel('Y') plt.gca().invert_yaxis() # Inverting Y-axis to match image coordinates plt.show() # Plotting heatmaps for each clip plot_heatmap(df_clip_1, "Heatmap - Clip 1") plot_heatmap(df_clip_2, "Heatmap - Clip 2") plot_heatmap(df_clip_3, "Heatmap - Clip 3") plot_heatmap(df_clip_4, "Heatmap - Clip 4") plot_heatmap(df_clip_5, "Heatmap - Clip 5")

Here we can see the areas where the subjects are concentrated for the longest time in each clip.

In clip 1, there is significant activity in the center of the image.
In clip 2, the majority of activity is concentrated at a specific point on the right side of the image.
In clip 3, the activity is similar to clip 1, though slightly more scattered and less dense.
In clip 4, activity is concentrated on the right, in a small area.
In clip 5, activity is minimal and concentrated at a single point in the center of the image.

Detection Consistency

A key aspect of detection model quality is its ability to track an object continuously. If the model "loses" an object in one frame but recovers it in the next, this indicates instability.

def analyze_consistency(df, clip_name): total_frames = df['frame_number'].max() labels = df['label'].unique() print(f"Consistency analysis for {clip_name}:") for label in labels: label_df = df[df['label'] == label] frames_present = label_df['frame_number'].nunique() consistency = (frames_present / total_frames) * 100 print(f" - Label '{label}': Present in {frames_present}/{total_frames} frames ({consistency:.2f}%)") print("-" * 30) analyze_consistency(df_clip_1, "Clip 1") analyze_consistency(df_clip_2, "Clip 2") analyze_consistency(df_clip_3, "Clip 3") analyze_consistency(df_clip_4, "Clip 4") analyze_consistency(df_clip_5, "Clip 5")

It can be observed that in all 5 clips, the person is detected in 100% of the frames.

Bounding Box Size Variability

Sudden variations in bounding box size may indicate unstable detection.

def plot_box_size_variability(df, title): df['area'] = df['width'] * df['height'] plt.figure(figsize=(10, 4)) sns.lineplot(data=df, x='frame_number', y='area', hue='label') plt.title(f"Bounding Box Size Variability - {title}") plt.xlabel("Frame") plt.ylabel("Area (pixels)") plt.show() plot_box_size_variability(df_clip_1, "Clip 1") plot_box_size_variability(df_clip_2, "Clip 2") plot_box_size_variability(df_clip_3, "Clip 3") plot_box_size_variability(df_clip_4, "Clip 4") plot_box_size_variability(df_clip_5, "Clip 5")

In all 5 clips, it can be observed that the area of the detected person remains constant overall, but there is significant noise in the detection, as evidenced by many peaks and valleys in the area-per-frame graph. A stable model should exhibit a much smoother area curve.

Movement Trajectory

Plotting the center (x, y) over time helps visualize whether the detected movement is fluid or jittery.

def plot_trajectory(df, title): plt.figure(figsize=(8, 8)) sns.scatterplot(data=df, x='x', y='y', hue='frame_number', palette='viridis', legend=None) plt.title(f"Detection Trajectory - {title}") plt.xlim(0, 640) plt.ylim(0, 640) plt.gca().invert_yaxis() plt.show() plot_trajectory(df_clip_1, "Clip 1") plot_trajectory(df_clip_2, "Clip 2")