diff --git a/PDFExpert.scpt b/PDFExpert.scpt new file mode 100644 index 0000000..1f2c8a6 --- /dev/null +++ b/PDFExpert.scpt @@ -0,0 +1,197 @@ +(* +Batch-OCR selected PDFs in DEVONthink using PDF Expert +Originally by Alex Wellerstein – adapted by ChatGPT, May 2025 +Released to the public domain: absolutely no warranties, guarantees, promises, *anything*. +Tested on macOS 14.6.1 • DEVONthink 3.9.x • PDF Expert 3.10.10 +*) + +------------------------------------------------------------ +-- 1. COLLECT THE SELECTION FROM DEVONTHINK +------------------------------------------------------------ +tell application id "DNtp" -- DEVONthink’s four-letter bundle ID + activate + set theSelection to selection +end tell + +if theSelection is {} then + display dialog "Nothing is selected in DEVONthink." buttons {"Cancel"} default button 1 with icon caution + return +end if + +-- Build a simple AppleScript list of POSIX paths for any *PDF* items. +set theFiles to {} +repeat with rec in theSelection + try + tell application id "DNtp" + if type of rec is PDF document then -- skip anything that isn’t a PDF + set end of theFiles to POSIX path of (path of rec as string) + end if + end tell + end try +end repeat + +if theFiles is {} then + display dialog "None of the selected items is a PDF." buttons {"Cancel"} default button 1 with icon caution + return +end if + +------------------------------------------------------------ +-- 2. SET UP A PROGRESS BAR +------------------------------------------------------------ +set totalFiles to length of theFiles +set progress total steps to totalFiles +set progress completed steps to 0 +set progress description to "Processing PDFs with PDF Expert…" +set progress additional description to "Starting…" + +------------------------------------------------------------ +-- 3. LOOP THROUGH EACH PDF AND RUN YOUR ORIGINAL OCR ROUTINE +------------------------------------------------------------ +repeat with i from 1 to totalFiles + set thePath to item i of theFiles + set progress completed steps to i + set progress additional description to "Processing " & i & " of " & totalFiles & "…" + + my pdfexpert_ocr(thePath) -- run your existing handler + + -- Refresh the record inside DEVONthink so its thumbnails & metadata update + try + tell application id "DNtp" + set recList to lookup records with path thePath -- returns a list (usually length 1) + repeat with r in recList + update record r without asking -- silent refresh + end repeat + end tell + end try + + -- Free up RAM every ten files (same logic you had) + if i mod 10 is 0 then + tell application "PDF Expert" to quit + delay 1 + end if +end repeat + +log "All done!" +display dialog "OCR finished for " & totalFiles & " PDF(s)." buttons {"OK"} default button 1 + +-------------------------------------------------------------------------------- +-- ↓↓↓ EVERYTHING BELOW HERE IS YOUR ORIGINAL CODE, UNCHANGED ↓↓↓ +-------------------------------------------------------------------------------- +on pdfexpert_ocr(filePath) + log ("Starting OCR for " & filePath) + do shell script "open " & quoted form of filePath -- assumes PDF Expert is the default PDF viewer + delay 0.5 + activate application "PDF Expert" + delay 0.5 + set fileTitle to getFilename(filePath, ".pdf") + tell application "System Events" + tell process "PDF Expert" + + -- locate the window that matches the file name + set win to false + repeat with w in every window + if (title of w) is fileTitle then set win to w + end repeat + + if (win is false) then + delay 2 -- give PDF Expert a moment more + repeat with w in every window + if (title of w) is fileTitle then set win to w + end repeat + end if + + if (win is false) then + display dialog "Could not find window for “" & fileTitle & "” in PDF Expert." buttons {"Cancel"} default button 1 with icon stop + error number -128 + end if + + -- click “Recognize Text” in Scan & OCR + click my getElementByClassAndName("Recognize Text", false, entire contents of win) + + -- click “Recognize…” in the side bar + tell my getElementByClassAndName("Recognize...", false, entire contents of win) + click + -- choose “All” pages when the dialog appears (if >1 page) + if exists radio button "All" of sheet 1 of win then + click radio button "All" of sheet 1 of win + click button "Apply" of sheet 1 of win + end if + end tell + + delay 1 -- short head-start before polling + + repeat -- poll until the progress sheet disappears + try + if not (exists win) or (count sheet of win) = 0 then exit repeat + if not (exists progress indicator 1 of sheet 1 of win) then exit repeat + on error + exit repeat -- any UI error = assume finished + end try + end repeat + + delay 1 -- extra cushion + + -- save & close + set fileMenu to menu "File" of menu bar item "File" of menu bar 1 + click my getElementByClassAndName("Save", false, entire contents of fileMenu) + delay 1 + if my isSaved(front window) then + click my getElementByClassAndName("Close Window", false, entire contents of fileMenu) + else + log "WARNING: file might not have saved!" + end if + end tell + end tell +end pdfexpert_ocr + +-- utility handlers (unchanged from your original) +on getFilename(theFile, theExtension) + tell application "Finder" to set fName to name of (POSIX file theFile as alias) + if (theExtension is not "") then + return trimText(fName, theExtension, "end") + else + return fName + end if +end getFilename + +on trimText(theText, theCharactersToTrim, theTrimDirection) + set theTrimLength to length of theCharactersToTrim + if theTrimDirection is in {"beginning", "both"} then + repeat while theText begins with theCharactersToTrim + try + set theText to characters (theTrimLength + 1) thru -1 of theText as string + on error + return "" + end try + end repeat + end if + if theTrimDirection is in {"end", "both"} then + repeat while theText ends with theCharactersToTrim + try + set theText to characters 1 thru -(theTrimLength + 1) of theText as string + on error + return "" + end try + end repeat + end if + return theText +end trimText + +on getElementByClassAndName(targetName, targetClass, targetElements) + repeat with uiElem in targetElements + try + if (targetClass is false and name of uiElem is targetName) ¬ + or (name of uiElem is targetName and class of uiElem is targetClass) then ¬ + return uiElem + end try + end repeat + return false +end getElementByClassAndName + +on isSaved(win) + try + set winName to name of win + if winName begins with "*" then return false + end try + return true +end isSaved