This page provides practical code samples for common UI automation tasks using the uiautomation.py
module. These examples demonstrate how to use various features of the ScreenInteraction
, BitmapInteraction
, and WindowInteraction
classes.
Table of Contents
Screen Interaction
Finding and Clicking on UI Elements
pythonCopyimport uiautomation as ui
import time
# Find and click on UI elements
ui.screen.find_text_on_screen("Interact with UI")
ui.screen.find_text_on_screen("Claude X")
# Find a specific number on a calculator
ui.screen.find_str_on_screen("9", size="medium", border=20)
# Click on a button if it exists
if ui.bitmap.click("new_token.png"):
print("Clicked on 'New Token' button")
Typing and Pressing Keys
pythonCopy# Simulate typing
ui.screen.simulate_typing("Hello, world!")
# Press a specific key
ui.screen.press_key('f5')
# Generate and type a random number
random_number = ui.generate_random_number_string()
ui.screen.simulate_typing("Random key " + random_number)
Scrolling
pythonCopy# Scroll down
ui.screen.scroll_mouse('down', 500, duration=2)
# Scroll up
ui.screen.scroll_mouse('up', 300, duration=1.5)
Window Manipulation
Working with Windows
pythonCopy# Bring a specific window to the front
ui.window.bring_window_to_front("Notepad")
# Resize the active window
ui.window.resize_active_window(800, 600)
# Maximize the active window
ui.window.maximize_active_window()
# Move the active window
ui.window.move_active_window_top_right()
time.sleep(1)
ui.window.move_active_window_bottom_left()
Interacting with Calculator
pythonCopyfrom pywinauto import Desktop, Application
# Connect to the Calculator window (assuming it's already open)
app = Application(backend="uia").connect(title_re=".*Calculator.*")
calc_window = app.window(title_re=".*Calculator.*")
# Bring the Calculator window to the foreground
calc_window.set_focus()
time.sleep(1) # Allow some time for the window to activate
# Perform a calculation
ui.window.click_ui_button("Clear", calc_window)
ui.window.click_ui_button("Nine", calc_window)
ui.window.click_ui_button("One", calc_window)
ui.window.click_ui_button("Multiply by", calc_window)
ui.window.click_ui_button("Two", calc_window)
ui.window.click_ui_button("Zero", calc_window)
ui.window.click_ui_button("Equals", calc_window)
Browser Interaction
Working with Browser Tabs
pythonCopy# Get the current browser tab title
current_tab = ui.window.get_current_browser_tab_title()
# Switch to a specific tab
target_tab = "Directory Listing"
main_window = ui.window.get_browser_window(target_tab, to_print=False)
# Interact with elements in the browser
if ui.window.browser_item_found(main_window, auto_id="id_new_file_name", control_type="Edit"):
ui.window.browser_item_coordinates(main_window, auto_id="id_new_file_name", control_type="Edit")
ui.window.browser_update_edit_field(main_window, auto_id="id_new_file_name", text="test.txt")
# ui.window.browser_press_button(main_window, auto_id="id_new_file_button", title="New empty file")
# Switch back to the original tab
ui.window.select_browser_tab(current_tab, main_window)
Combined Scenarios
VPN Connection Checker
pythonCopy# Check VPN connection and connect if not connected
if not ui.screen.is_there("connected_vpn.png"):
if ui.bitmap.click("disconnected_vpn.png"):
ui.bitmap.click("connect_vpn.png")
if ui.bitmap.wait_for("connected.png"):
ui.window.minimize_active_window()
time.sleep(5)
Token Generator and Clipboard Saver
pythonCopy# Generate a new token and save it to a file
if not ui.screen.is_there("active_token.png"):
print("No active token found. Generating a new one.")
ui.bitmap.click("new_token.png")
ui.bitmap.click("name_field.png")
ui.screen.simulate_typing("Random key " + ui.generate_random_number_string())
ui.bitmap.click("readwrite_flag.png")
ui.bitmap.click("create_btn.png")
ui.bitmap.click("copy_icon.png")
# Save the copied token to a file
file_path = r"C:\Users\username\Documents\token.txt"
ui.window.save_clipboard_text_to_file(file_path)
ui.bitmap.click("close_btn.png")
ui.screen.press_key('f5') # Refresh the page
else:
print("Active token found.")
OCR Text Finder
pythonCopy# Capture the screen and perform OCR
screen = ui.screen.capture_screen()
ocr_result = ui.screen.perform_ocr(screen)
# Print all text found on the screen with high confidence
for i in range(len(ocr_result['text'])):
if int(ocr_result['conf'][i]) > 60: # confidence threshold
print(f"Text: {ocr_result['text'][i]}")
print(f"Coordinates: (x={ocr_result['left'][i]}, y={ocr_result['top'][i]})")
print(f"Size: {ocr_result['width'][i]}x{ocr_result['height'][i]}")
print("---")
These code samples demonstrate a wide range of UI automation tasks that can be performed using the uiautomation.py
module. Users can adapt and combine these examples to create more complex automation scripts tailored to their specific needs.