AI x Robotics Project : Gestures Recognition LED control using mediapipe
Share
Want to control LEDs using just your hand gestures — like turning ON a light with an open palm or switching it OFF with a fist?
In this tutorial, you'll learn how to use your webcam, Python, and ESP32 (without internet!) to control LEDs with real-time hand gesture recognition using MediaPipe and socket communication.
🔧 What You’ll Need
Hardware
- ESP32 Development Board
- One LED
- Breadboard & jumper wires
- Your PC/Laptop with a webcam
Software
- Python 3
- OpenCV
- MediaPipe
- Arduino IDE
Project Overview
What’s happening behind the scenes:
- Your PC’s webcam captures hand gestures.
- Python + MediaPipe detects your hand shape (open palm or closed fist).
- Based on the gesture, it sends commands (
LED1_ON
,LED1_OFF
) over Wi-Fi to the ESP32. - ESP32 receives commands and turns ON/OFF the connected LEDs.
Bonus: No internet required! Your PC connects directly to ESP32’s WiFi hotspot.
Circuit Diagram
GPIO Pin (ESP32) | Connected To |
---|
GPIO 16 | LED (+ve) |
GND | LED (-ve) |
Step 1: Upload the Code to ESP32
Paste the following code into your Arduino IDE and upload it to your ESP32:
---------------------------------------------------------------------------------------
#include <WiFi.h>
// WiFi credentials
const char* ssid = "ESP32-Gesture-Control";
const char* password = "12345678";
// TCP Server on port 8080
WiFiServer server(8080);
String command;
const int output16 = 16;
void setup() {
Serial.begin(115200);
pinMode(output16, OUTPUT);
digitalWrite(output16, LOW);
WiFi.softAP(ssid, password);
Serial.println("Access Point Started");
Serial.println(WiFi.softAPIP());
server.begin();
}
void loop() {
WiFiClient client = server.available();
if (client) {
Serial.println("Client connected");
while (client.connected()) {
if (client.available()) {
command = client.readStringUntil('\n');
command.trim();
Serial.println("Received: " + command);
if (command == "LED1_ON") {
digitalWrite(output16, HIGH);
client.println("LED1_ON_ACK");
} else if (command == "LED1_OFF") {
digitalWrite(output16, LOW);
client.println("LED1_OFF_ACK");
}
}
}
client.stop();
Serial.println("Client disconnected");
}
}
---------------------------------------------------------------------------------------
After uploading:
Open the Serial Monitor and you’ll see:Access Point Started
AP IP address: 192.168.4.1 (may be different)
Step 2: Install Python Dependencies
On your PC, install required Python libraries:
`pip install opencv-python mediapipe`
Step 3: Python Code to Detect Hand Gestures
Here's your complete Python script to control LEDs via webcam:
---------------------------------------------------------------------------------------
import cv2 import mediapipe as mp import socket import time import sys # ESP32 connection settings ESP32_IP = '192.168.4.1' # Default IP when ESP32 is in Access Point mode ESP32_PORT = 8080 # Initialize MediaPipe Hand Detection mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils hands = mp_hands.Hands( static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5 ) # Connect to ESP32 def connect_to_esp32(): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((ESP32_IP, ESP32_PORT)) print(f"Connected to ESP32 at {ESP32_IP}:{ESP32_PORT}") return sock except socket.error as e: print(f"Failed to connect to ESP32: {e}") print("Make sure you're connected to the ESP32-Gesture-Control WiFi network") sys.exit(1) # Send command to ESP32 def send_command(sock, command): try: sock.sendall(f"{command}\n".encode()) response = sock.recv(1024).decode().strip() print(f"ESP32 response: {response}") return response except socket.error as e: print(f"Communication error: {e}") return None # Detect if palm is open (fingers extended) def is_palm_open(hand_landmarks): # Get fingertip and pip (second joint) landmarks finger_tips = [ hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP] ] finger_pips = [ hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP], hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP], hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_PIP], hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_PIP] ] # Check if fingertips are higher than PIPs (fingers extended) extended_fingers = 0 for tip, pip in zip(finger_tips, finger_pips): if tip.y < pip.y: # In image coordinates, y increases downward extended_fingers += 1 # Consider palm open if at least 3 fingers are extended return extended_fingers >= 3 # Detect if hand is making a fist def is_fist(hand_landmarks): # Get fingertip and mcp (base joint) landmarks finger_tips = [ hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_TIP], hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_TIP] ] finger_mcps = [ hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP], hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP], hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP], hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP] ] # For a fist, fingertips should be close to/below the MCPs folded_fingers = 0 for tip, mcp in zip(finger_tips, finger_mcps): if tip.y > mcp.y - 0.05: # Finger is folded if tip is below or close to MCP folded_fingers += 1 # Consider it a fist if at least 3 fingers are folded return folded_fingers >= 3 def main(): print("Connecting to ESP32...") sock = connect_to_esp32() print("Starting hand gesture detection...") # Open webcam cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Error: Could not open webcam") return # For tracking state changes led1_state = False led2_state = False last_gesture_time = time.time() debounce_time = 1.0 # 1 second between gesture recognition to avoid rapid switching while cap.isOpened(): success, image = cap.read() if not success: print("Failed to capture image from webcam") break # Flip the image horizontally for a more intuitive mirror view image = cv2.flip(image, 1) # Convert the BGR image to RGB image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Process the image and detect hands results = hands.process(image_rgb) # Draw hand landmarks on the image if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks( image, hand_landmarks, mp_hands.HAND_CONNECTIONS) # Check for gestures with debounce to avoid rapid toggling current_time = time.time() if current_time - last_gesture_time > debounce_time: # Check if palm is open (to turn ON LED1) if is_palm_open(hand_landmarks) and not led1_state: send_command(sock, "LED1_ON") led1_state = True last_gesture_time = current_time cv2.putText(image, "Command: LED1 ON", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # Check if hand is a fist (to turn OFF LED1) elif is_fist(hand_landmarks) and led1_state: send_command(sock, "LED1_OFF") led1_state = False last_gesture_time = current_time cv2.putText(image, "Command: LED1 OFF", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # You can add more gestures here for LED2 # For example, detect if the hand is tilted right for LED2_ON # and tilted left for LED2_OFF # Display status led1_color = (0, 255, 0) if led1_state else (0, 0, 255) led2_color = (0, 255, 0) if led2_state else (0, 0, 255) cv2.putText(image, f"LED1: {'ON' if led1_state else 'OFF'}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, led1_color, 2) cv2.putText(image, f"LED2: {'ON' if led2_state else 'OFF'}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, led2_color, 2) # Display instructions cv2.putText(image, "Open Palm = LED1 ON", (10, image.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) cv2.putText(image, "Closed Fist = LED1 OFF", (10, image.shape[0] - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) # Show the image cv2.imshow('Hand Gesture Detection', image) # Exit on 'q' press if cv2.waitKey(5) & 0xFF == ord('q'): break # Clean up cap.release() cv2.destroyAllWindows() sock.close() print("Program terminated") if __name__ == "__main__": main()
---------------------------------------------------------------------------------------
📌 Save this as gesture_control.py
It performs:
- Open Palm → LED1 ON
- Fist → LED1 OFF
Make sure you’re connected to the ESP32's WiFi network:
SSID: ESP32-Gesture-Control
Password: 12345678
Then, run:
`python gesture_control.py`
You’ll see the webcam feed with hand landmarks and LED status printed.
How It Looks
- Open palm:
Command: LED1 ON
- Closed fist:
Command: LED1 OFF
Watch the LEDs on your breadboard respond instantly to your hand!
Customize It Further!
Add more gestures like:
Thumbs up → Turn ON LED2
Hand tilt → Control motors
Peace sign → Trigger buzzer
How It Works — Simplified
Component | Role |
---|---|
MediaPipe | Detects and tracks hand landmarks |
OpenCV | Captures and displays webcam feed |
Python Socket | Sends commands to ESP32 |
ESP32 TCP Server | Receives commands and controls GPIO pins |
Final Thoughts
This project is a fun way to explore computer vision, IoT, and wireless control — no internet, no Bluetooth, just pure tech magic.
Want to make it more advanced? Add gesture-based car control, light dimming, or even appliance control. The possibilities are endless.
If you’d like a complete kit to build this, reach out to us at Techsage – we make learning tech fun and practical!