UI Automation Code Samples

This page provides practical code samples for common UI automation tasks using the uiautomation.py module. These examples demonstrate how to use various features of the ScreenInteraction, BitmapInteraction, and WindowInteraction classes.

Table of Contents

  1. Screen Interaction
  2. Window Manipulation
  3. Browser Interaction
  4. Combined Scenarios

Screen Interaction

Finding and Clicking on UI Elements

pythonCopyimport uiautomation as ui
import time

# Find and click on UI elements
ui.screen.find_text_on_screen("Interact with UI")
ui.screen.find_text_on_screen("Claude X")

# Find a specific number on a calculator
ui.screen.find_str_on_screen("9", size="medium", border=20)

# Click on a button if it exists
if ui.bitmap.click("new_token.png"):
    print("Clicked on 'New Token' button")

Typing and Pressing Keys

pythonCopy# Simulate typing
ui.screen.simulate_typing("Hello, world!")

# Press a specific key
ui.screen.press_key('f5')

# Generate and type a random number
random_number = ui.generate_random_number_string()
ui.screen.simulate_typing("Random key " + random_number)

Scrolling

pythonCopy# Scroll down
ui.screen.scroll_mouse('down', 500, duration=2)

# Scroll up
ui.screen.scroll_mouse('up', 300, duration=1.5)

Window Manipulation

Working with Windows

pythonCopy# Bring a specific window to the front
ui.window.bring_window_to_front("Notepad")

# Resize the active window
ui.window.resize_active_window(800, 600)

# Maximize the active window
ui.window.maximize_active_window()

# Move the active window
ui.window.move_active_window_top_right()
time.sleep(1)
ui.window.move_active_window_bottom_left()

Interacting with Calculator

pythonCopyfrom pywinauto import Desktop, Application

# Connect to the Calculator window (assuming it's already open)
app = Application(backend="uia").connect(title_re=".*Calculator.*")
calc_window = app.window(title_re=".*Calculator.*")

# Bring the Calculator window to the foreground
calc_window.set_focus()
time.sleep(1)  # Allow some time for the window to activate

# Perform a calculation
ui.window.click_ui_button("Clear", calc_window)
ui.window.click_ui_button("Nine", calc_window)
ui.window.click_ui_button("One", calc_window)
ui.window.click_ui_button("Multiply by", calc_window)
ui.window.click_ui_button("Two", calc_window)
ui.window.click_ui_button("Zero", calc_window)
ui.window.click_ui_button("Equals", calc_window)

Browser Interaction

Working with Browser Tabs

pythonCopy# Get the current browser tab title
current_tab = ui.window.get_current_browser_tab_title()

# Switch to a specific tab
target_tab = "Directory Listing"
main_window = ui.window.get_browser_window(target_tab, to_print=False)

# Interact with elements in the browser
if ui.window.browser_item_found(main_window, auto_id="id_new_file_name", control_type="Edit"):
    ui.window.browser_item_coordinates(main_window, auto_id="id_new_file_name", control_type="Edit")
    ui.window.browser_update_edit_field(main_window, auto_id="id_new_file_name", text="test.txt")
    # ui.window.browser_press_button(main_window, auto_id="id_new_file_button", title="New empty file")

# Switch back to the original tab
ui.window.select_browser_tab(current_tab, main_window)

Combined Scenarios

VPN Connection Checker

pythonCopy# Check VPN connection and connect if not connected
if not ui.screen.is_there("connected_vpn.png"):
    if ui.bitmap.click("disconnected_vpn.png"):
        ui.bitmap.click("connect_vpn.png")
        if ui.bitmap.wait_for("connected.png"):
            ui.window.minimize_active_window()    
time.sleep(5)

Token Generator and Clipboard Saver

pythonCopy# Generate a new token and save it to a file
if not ui.screen.is_there("active_token.png"):
    print("No active token found. Generating a new one.")
    ui.bitmap.click("new_token.png")
    ui.bitmap.click("name_field.png")
    ui.screen.simulate_typing("Random key " + ui.generate_random_number_string())
    ui.bitmap.click("readwrite_flag.png")
    ui.bitmap.click("create_btn.png")
    ui.bitmap.click("copy_icon.png")
    
    # Save the copied token to a file
    file_path = r"C:\Users\username\Documents\token.txt"
    ui.window.save_clipboard_text_to_file(file_path)
    
    ui.bitmap.click("close_btn.png")
    ui.screen.press_key('f5')  # Refresh the page
else:
    print("Active token found.")

OCR Text Finder

pythonCopy# Capture the screen and perform OCR
screen = ui.screen.capture_screen()
ocr_result = ui.screen.perform_ocr(screen)

# Print all text found on the screen with high confidence
for i in range(len(ocr_result['text'])):
    if int(ocr_result['conf'][i]) > 60:  # confidence threshold
        print(f"Text: {ocr_result['text'][i]}")
        print(f"Coordinates: (x={ocr_result['left'][i]}, y={ocr_result['top'][i]})")
        print(f"Size: {ocr_result['width'][i]}x{ocr_result['height'][i]}")
        print("---")

These code samples demonstrate a wide range of UI automation tasks that can be performed using the uiautomation.py module. Users can adapt and combine these examples to create more complex automation scripts tailored to their specific needs.