#!/bin/bash
# Autonomous PTZ Camera Scanner
# Systematically scans room areas with AI visual analysis
#
# Usage: ./autonomous_scan.sh [OPTIONS] "search objective"
#
# @license MIT
DEVICE="${OBSBOT_DEVICE:-/dev/video0}"
OUTPUT_DIR="${OBSBOT_OUTPUT_DIR:-/tmp/obsbot_captures}"
LM_STUDIO_URL="${LM_STUDIO_URL:-http://localhost:1234/v1/chat/completions}"
VL_MODEL="${OBSBOT_VL_MODEL:-qwen2.5-vl-7b-instruct}"
MAX_ITERATIONS=15
CONFIDENCE_THRESHOLD=0.7
DELAY_BETWEEN_MOVES=2
# Exploration patterns - PTZ positions
DEFAULT_SEQUENCE=(
"0,0,2,CENTER"
"250000,0,2,LEFT_CENTER"
"-250000,0,2,RIGHT_CENTER"
"350000,100000,1,FAR_LEFT_UP"
"-350000,100000,1,FAR_RIGHT_UP"
"0,200000,2,HIGH_CENTER"
"300000,150000,1,HIGH_LEFT"
"-300000,150000,1,HIGH_RIGHT"
"200000,-100000,2,LOW_LEFT"
"-200000,-100000,2,LOW_RIGHT"
"0,-150000,2,LOW_CENTER"
"400000,0,1,EXTREME_LEFT"
"-400000,0,1,EXTREME_RIGHT"
)
GRID_SEQUENCE=(
"-400000,-200000,1,GRID_1" "0,-200000,1,GRID_2" "400000,-200000,1,GRID_3"
"-400000,0,1,GRID_4" "0,0,2,GRID_5" "400000,0,1,GRID_6"
"-400000,200000,1,GRID_7" "0,200000,1,GRID_8" "400000,200000,1,GRID_9"
)
QUICK_SEQUENCE=(
"0,0,1,CENTER"
"300000,0,1,LEFT"
"-300000,0,1,RIGHT"
"0,150000,1,UP"
"0,-150000,1,DOWN"
)
show_usage() {
cat << EOF
Autonomous PTZ Camera Scanner
USAGE:
$0 [OPTIONS] "search objective"
OPTIONS:
-p, --pattern PATTERN Pattern: default, grid, quick (default: default)
-i, --iterations NUM Maximum iterations (default: $MAX_ITERATIONS)
-c, --confidence NUM Confidence threshold 0.0-1.0 (default: $CONFIDENCE_THRESHOLD)
-d, --delay NUM Delay between moves in seconds (default: $DELAY_BETWEEN_MOVES)
-o, --output DIR Output directory (default: $OUTPUT_DIR)
-v, --verbose Verbose output
--device PATH Video device (default: $DEVICE)
--no-analysis Skip AI analysis
--center Return to center after completion
-h, --help Show this help
ENVIRONMENT VARIABLES:
OBSBOT_DEVICE Video device path
OBSBOT_OUTPUT_DIR Capture output directory
LM_STUDIO_URL LM Studio API endpoint
OBSBOT_VL_MODEL Vision-language model name
EXAMPLES:
$0 "find the open window"
$0 --pattern grid --verbose "locate any books"
$0 --pattern quick --confidence 0.5 "identify electronic devices"
EOF
}
log_message() {
local level=$1; shift
local message="$*"
local timestamp=$(date '+%H:%M:%S')
if [ "$VERBOSE" = true ] || [ "$level" = "ERROR" ] || [ "$level" = "INFO" ]; then
echo "[$timestamp] [$level] $message"
fi
}
setup_environment() {
mkdir -p "$OUTPUT_DIR"
}
execute_movement() {
local pan=$1 tilt=$2 zoom=$3 zone_name=$4
log_message "INFO" "Moving to $zone_name: PAN=$pan, TILT=$tilt, ZOOM=$zoom"
v4l2-ctl -d $DEVICE --set-ctrl=pan_absolute=$pan,tilt_absolute=$tilt,zoom_absolute=$zoom 2>/dev/null
if [ $? -ne 0 ]; then
log_message "ERROR" "Failed to move camera"
return 1
fi
sleep $DELAY_BETWEEN_MOVES
return 0
}
capture_and_analyze() {
local zone_name=$1 iteration=$2
local timestamp=$(date +%s)
local image_file="$OUTPUT_DIR/capture_${iteration}_${zone_name}_$(date -u +%Y%m%d_%H%M%S).jpg"
if ! ffmpeg -f v4l2 -i $DEVICE -frames 1 -q:v 2 "$image_file" -y 2>/dev/null; then
log_message "ERROR" "Failed to capture image"
return 1
fi
log_message "DEBUG" "Image saved: $image_file"
if [ "$NO_ANALYSIS" = true ]; then
log_message "INFO" "Zone $zone_name captured"
return 0
fi
log_message "DEBUG" "Analyzing with AI..."
local base64_image=$(base64 -w 0 "$image_file")
local payload_file="/tmp/scan_payload_${timestamp}.json"
cat > "$payload_file" << PAYLOADEOF
{
"model": "$VL_MODEL",
"messages": [
{"role": "system", "content": "You are a visual analyst. Analyze images and provide structured observations."},
{"role": "user", "content": [
{"type": "text", "text": "Zone: $zone_name. Objective: $OBJECTIVE. Analyze this view."},
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,$base64_image"}}
]}
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "analysis",
"schema": {
"type": "object",
"properties": {
"observation": {"type": "string"},
"target_detected": {"type": "boolean"},
"confidence": {"type": "number"},
"notable_objects": {"type": "array", "items": {"type": "string"}}
},
"required": ["observation", "target_detected", "confidence"]
}
}
},
"max_tokens": 500
}
PAYLOADEOF
local response=$(curl -s -X POST "$LM_STUDIO_URL" -H "Content-Type: application/json" -d @"$payload_file" 2>/dev/null)
rm -f "$payload_file"
if [ -z "$response" ]; then
log_message "ERROR" "No response from AI"
return 1
fi
local json_content=$(echo "$response" | jq -r ".choices[0].message.content" 2>/dev/null)
local target_detected=$(echo "$json_content" | jq -r ".target_detected // false" 2>/dev/null)
local confidence=$(echo "$json_content" | jq -r ".confidence // 0" 2>/dev/null)
local observation=$(echo "$json_content" | jq -r ".observation // \"\"" 2>/dev/null)
log_message "INFO" "Zone: $zone_name | Target: $target_detected | Confidence: $confidence"
if [ "$target_detected" = "true" ] && (( $(echo "$confidence >= $CONFIDENCE_THRESHOLD" | bc -l) )); then
log_message "INFO" "TARGET FOUND in $zone_name: $observation"
return 2
fi
return 0
}
main() {
PATTERN="default"
VERBOSE=false
NO_ANALYSIS=false
CENTER_AFTER=false
while [[ $# -gt 0 ]]; do
case $1 in
-p|--pattern) PATTERN="$2"; shift 2 ;;
-i|--iterations) MAX_ITERATIONS="$2"; shift 2 ;;
-c|--confidence) CONFIDENCE_THRESHOLD="$2"; shift 2 ;;
-d|--delay) DELAY_BETWEEN_MOVES="$2"; shift 2 ;;
-o|--output) OUTPUT_DIR="$2"; shift 2 ;;
-v|--verbose) VERBOSE=true; shift ;;
--device) DEVICE="$2"; shift 2 ;;
--no-analysis) NO_ANALYSIS=true; shift ;;
--center) CENTER_AFTER=true; shift ;;
-h|--help) show_usage; exit 0 ;;
-*) echo "Unknown option: $1"; show_usage; exit 1 ;;
*) OBJECTIVE="$1"; shift ;;
esac
done
if [ -z "$OBJECTIVE" ]; then
echo "Error: Search objective required"
show_usage
exit 1
fi
case $PATTERN in
default) SEQUENCE=("${DEFAULT_SEQUENCE[@]}") ;;
grid) SEQUENCE=("${GRID_SEQUENCE[@]}") ;;
quick) SEQUENCE=("${QUICK_SEQUENCE[@]}") ;;
*) echo "Unknown pattern: $PATTERN"; exit 1 ;;
esac
setup_environment
log_message "INFO" "Starting scan: $OBJECTIVE"
log_message "INFO" "Pattern: $PATTERN (${#SEQUENCE[@]} positions)"
local targets_found=0
for i in $(seq 0 $((${#SEQUENCE[@]} - 1))); do
[ $i -ge $MAX_ITERATIONS ] && break
IFS=',' read -r pan tilt zoom zone_name <<< "${SEQUENCE[$i]}"
log_message "INFO" "Position $((i + 1))/${#SEQUENCE[@]} - $zone_name"
execute_movement "${pan#+}" "${tilt#+}" "$zoom" "$zone_name" || continue
case $(capture_and_analyze "$zone_name" $((i + 1)); echo $?) in
2) targets_found=$((targets_found + 1)) ;;
esac
done
[ "$CENTER_AFTER" = true ] && v4l2-ctl -d $DEVICE --set-ctrl=pan_absolute=0,tilt_absolute=0,zoom_absolute=2 2>/dev/null
log_message "INFO" "Scan complete. Targets found: $targets_found"
}
# Check dependencies
for cmd in v4l2-ctl ffmpeg jq bc curl; do
command -v $cmd &>/dev/null || { echo "Error: $cmd required"; exit 1; }
done
main "$@"