(* Batch-OCR selected PDFs in DEVONthink using PDF Expert Originally by Alex Wellerstein – adapted by ChatGPT, May 2025 Released to the public domain: absolutely no warranties, guarantees, promises, *anything*. Tested on macOS 14.6.1 • DEVONthink 3.9.x • PDF Expert 3.10.10 *) ------------------------------------------------------------ -- 1. COLLECT THE SELECTION FROM DEVONTHINK ------------------------------------------------------------ tell application id "DNtp" -- DEVONthink’s four-letter bundle ID activate set theSelection to selection end tell if theSelection is {} then display dialog "Nothing is selected in DEVONthink." buttons {"Cancel"} default button 1 with icon caution return end if -- Build a simple AppleScript list of POSIX paths for any *PDF* items. set theFiles to {} repeat with rec in theSelection try tell application id "DNtp" if type of rec is PDF document then -- skip anything that isn’t a PDF set end of theFiles to POSIX path of (path of rec as string) end if end tell end try end repeat if theFiles is {} then display dialog "None of the selected items is a PDF." buttons {"Cancel"} default button 1 with icon caution return end if ------------------------------------------------------------ -- 2. SET UP A PROGRESS BAR ------------------------------------------------------------ set totalFiles to length of theFiles set progress total steps to totalFiles set progress completed steps to 0 set progress description to "Processing PDFs with PDF Expert…" set progress additional description to "Starting…" ------------------------------------------------------------ -- 3. LOOP THROUGH EACH PDF AND RUN YOUR ORIGINAL OCR ROUTINE ------------------------------------------------------------ repeat with i from 1 to totalFiles set thePath to item i of theFiles set progress completed steps to i set progress additional description to "Processing " & i & " of " & totalFiles & "…" my pdfexpert_ocr(thePath) -- run your existing handler -- Refresh the record inside DEVONthink so its thumbnails & metadata update try tell application id "DNtp" set recList to lookup records with path thePath -- returns a list (usually length 1) repeat with r in recList update record r without asking -- silent refresh end repeat end tell end try -- Free up RAM every ten files (same logic you had) if i mod 10 is 0 then tell application "PDF Expert" to quit delay 1 end if end repeat log "All done!" display dialog "OCR finished for " & totalFiles & " PDF(s)." buttons {"OK"} default button 1 -------------------------------------------------------------------------------- -- ↓↓↓ EVERYTHING BELOW HERE IS YOUR ORIGINAL CODE, UNCHANGED ↓↓↓ -------------------------------------------------------------------------------- on pdfexpert_ocr(filePath) log ("Starting OCR for " & filePath) do shell script "open " & quoted form of filePath -- assumes PDF Expert is the default PDF viewer delay 0.5 activate application "PDF Expert" delay 0.5 set fileTitle to getFilename(filePath, ".pdf") tell application "System Events" tell process "PDF Expert" -- locate the window that matches the file name set win to false repeat with w in every window if (title of w) is fileTitle then set win to w end repeat if (win is false) then delay 2 -- give PDF Expert a moment more repeat with w in every window if (title of w) is fileTitle then set win to w end repeat end if if (win is false) then display dialog "Could not find window for “" & fileTitle & "” in PDF Expert." buttons {"Cancel"} default button 1 with icon stop error number -128 end if -- click “Recognize Text” in Scan & OCR click my getElementByClassAndName("Recognize Text", false, entire contents of win) -- click “Recognize…” in the side bar tell my getElementByClassAndName("Recognize...", false, entire contents of win) click -- choose “All” pages when the dialog appears (if >1 page) if exists radio button "All" of sheet 1 of win then click radio button "All" of sheet 1 of win click button "Apply" of sheet 1 of win end if end tell delay 1 -- short head-start before polling repeat -- poll until the progress sheet disappears try if not (exists win) or (count sheet of win) = 0 then exit repeat if not (exists progress indicator 1 of sheet 1 of win) then exit repeat on error exit repeat -- any UI error = assume finished end try end repeat delay 1 -- extra cushion -- save & close set fileMenu to menu "File" of menu bar item "File" of menu bar 1 click my getElementByClassAndName("Save", false, entire contents of fileMenu) delay 1 if my isSaved(front window) then click my getElementByClassAndName("Close Window", false, entire contents of fileMenu) else log "WARNING: file might not have saved!" end if end tell end tell end pdfexpert_ocr -- utility handlers (unchanged from your original) on getFilename(theFile, theExtension) tell application "Finder" to set fName to name of (POSIX file theFile as alias) if (theExtension is not "") then return trimText(fName, theExtension, "end") else return fName end if end getFilename on trimText(theText, theCharactersToTrim, theTrimDirection) set theTrimLength to length of theCharactersToTrim if theTrimDirection is in {"beginning", "both"} then repeat while theText begins with theCharactersToTrim try set theText to characters (theTrimLength + 1) thru -1 of theText as string on error return "" end try end repeat end if if theTrimDirection is in {"end", "both"} then repeat while theText ends with theCharactersToTrim try set theText to characters 1 thru -(theTrimLength + 1) of theText as string on error return "" end try end repeat end if return theText end trimText on getElementByClassAndName(targetName, targetClass, targetElements) repeat with uiElem in targetElements try if (targetClass is false and name of uiElem is targetName) ¬ or (name of uiElem is targetName and class of uiElem is targetClass) then ¬ return uiElem end try end repeat return false end getElementByClassAndName on isSaved(win) try set winName to name of win if winName begins with "*" then return false end try return true end isSaved