198 lines
6.3 KiB
AppleScript
198 lines
6.3 KiB
AppleScript
(*
|
||
Batch-OCR selected PDFs in DEVONthink using PDF Expert
|
||
Originally by Alex Wellerstein – adapted by ChatGPT, May 2025
|
||
Released to the public domain: absolutely no warranties, guarantees, promises, *anything*.
|
||
Tested on macOS 14.6.1 • DEVONthink 3.9.x • PDF Expert 3.10.10
|
||
*)
|
||
|
||
------------------------------------------------------------
|
||
-- 1. COLLECT THE SELECTION FROM DEVONTHINK
|
||
------------------------------------------------------------
|
||
tell application id "DNtp" -- DEVONthink’s four-letter bundle ID
|
||
activate
|
||
set theSelection to selection
|
||
end tell
|
||
|
||
if theSelection is {} then
|
||
display dialog "Nothing is selected in DEVONthink." buttons {"Cancel"} default button 1 with icon caution
|
||
return
|
||
end if
|
||
|
||
-- Build a simple AppleScript list of POSIX paths for any *PDF* items.
|
||
set theFiles to {}
|
||
repeat with rec in theSelection
|
||
try
|
||
tell application id "DNtp"
|
||
if type of rec is PDF document then -- skip anything that isn’t a PDF
|
||
set end of theFiles to POSIX path of (path of rec as string)
|
||
end if
|
||
end tell
|
||
end try
|
||
end repeat
|
||
|
||
if theFiles is {} then
|
||
display dialog "None of the selected items is a PDF." buttons {"Cancel"} default button 1 with icon caution
|
||
return
|
||
end if
|
||
|
||
------------------------------------------------------------
|
||
-- 2. SET UP A PROGRESS BAR
|
||
------------------------------------------------------------
|
||
set totalFiles to length of theFiles
|
||
set progress total steps to totalFiles
|
||
set progress completed steps to 0
|
||
set progress description to "Processing PDFs with PDF Expert…"
|
||
set progress additional description to "Starting…"
|
||
|
||
------------------------------------------------------------
|
||
-- 3. LOOP THROUGH EACH PDF AND RUN YOUR ORIGINAL OCR ROUTINE
|
||
------------------------------------------------------------
|
||
repeat with i from 1 to totalFiles
|
||
set thePath to item i of theFiles
|
||
set progress completed steps to i
|
||
set progress additional description to "Processing " & i & " of " & totalFiles & "…"
|
||
|
||
my pdfexpert_ocr(thePath) -- run your existing handler
|
||
|
||
-- Refresh the record inside DEVONthink so its thumbnails & metadata update
|
||
try
|
||
tell application id "DNtp"
|
||
set recList to lookup records with path thePath -- returns a list (usually length 1)
|
||
repeat with r in recList
|
||
update record r without asking -- silent refresh
|
||
end repeat
|
||
end tell
|
||
end try
|
||
|
||
-- Free up RAM every ten files (same logic you had)
|
||
if i mod 10 is 0 then
|
||
tell application "PDF Expert" to quit
|
||
delay 1
|
||
end if
|
||
end repeat
|
||
|
||
log "All done!"
|
||
display dialog "OCR finished for " & totalFiles & " PDF(s)." buttons {"OK"} default button 1
|
||
|
||
--------------------------------------------------------------------------------
|
||
-- ↓↓↓ EVERYTHING BELOW HERE IS YOUR ORIGINAL CODE, UNCHANGED ↓↓↓
|
||
--------------------------------------------------------------------------------
|
||
on pdfexpert_ocr(filePath)
|
||
log ("Starting OCR for " & filePath)
|
||
do shell script "open " & quoted form of filePath -- assumes PDF Expert is the default PDF viewer
|
||
delay 0.5
|
||
activate application "PDF Expert"
|
||
delay 0.5
|
||
set fileTitle to getFilename(filePath, ".pdf")
|
||
tell application "System Events"
|
||
tell process "PDF Expert"
|
||
|
||
-- locate the window that matches the file name
|
||
set win to false
|
||
repeat with w in every window
|
||
if (title of w) is fileTitle then set win to w
|
||
end repeat
|
||
|
||
if (win is false) then
|
||
delay 2 -- give PDF Expert a moment more
|
||
repeat with w in every window
|
||
if (title of w) is fileTitle then set win to w
|
||
end repeat
|
||
end if
|
||
|
||
if (win is false) then
|
||
display dialog "Could not find window for “" & fileTitle & "” in PDF Expert." buttons {"Cancel"} default button 1 with icon stop
|
||
error number -128
|
||
end if
|
||
|
||
-- click “Recognize Text” in Scan & OCR
|
||
click my getElementByClassAndName("Recognize Text", false, entire contents of win)
|
||
|
||
-- click “Recognize…” in the side bar
|
||
tell my getElementByClassAndName("Recognize...", false, entire contents of win)
|
||
click
|
||
-- choose “All” pages when the dialog appears (if >1 page)
|
||
if exists radio button "All" of sheet 1 of win then
|
||
click radio button "All" of sheet 1 of win
|
||
click button "Apply" of sheet 1 of win
|
||
end if
|
||
end tell
|
||
|
||
delay 1 -- short head-start before polling
|
||
|
||
repeat -- poll until the progress sheet disappears
|
||
try
|
||
if not (exists win) or (count sheet of win) = 0 then exit repeat
|
||
if not (exists progress indicator 1 of sheet 1 of win) then exit repeat
|
||
on error
|
||
exit repeat -- any UI error = assume finished
|
||
end try
|
||
end repeat
|
||
|
||
delay 1 -- extra cushion
|
||
|
||
-- save & close
|
||
set fileMenu to menu "File" of menu bar item "File" of menu bar 1
|
||
click my getElementByClassAndName("Save", false, entire contents of fileMenu)
|
||
delay 1
|
||
if my isSaved(front window) then
|
||
click my getElementByClassAndName("Close Window", false, entire contents of fileMenu)
|
||
else
|
||
log "WARNING: file might not have saved!"
|
||
end if
|
||
end tell
|
||
end tell
|
||
end pdfexpert_ocr
|
||
|
||
-- utility handlers (unchanged from your original)
|
||
on getFilename(theFile, theExtension)
|
||
tell application "Finder" to set fName to name of (POSIX file theFile as alias)
|
||
if (theExtension is not "") then
|
||
return trimText(fName, theExtension, "end")
|
||
else
|
||
return fName
|
||
end if
|
||
end getFilename
|
||
|
||
on trimText(theText, theCharactersToTrim, theTrimDirection)
|
||
set theTrimLength to length of theCharactersToTrim
|
||
if theTrimDirection is in {"beginning", "both"} then
|
||
repeat while theText begins with theCharactersToTrim
|
||
try
|
||
set theText to characters (theTrimLength + 1) thru -1 of theText as string
|
||
on error
|
||
return ""
|
||
end try
|
||
end repeat
|
||
end if
|
||
if theTrimDirection is in {"end", "both"} then
|
||
repeat while theText ends with theCharactersToTrim
|
||
try
|
||
set theText to characters 1 thru -(theTrimLength + 1) of theText as string
|
||
on error
|
||
return ""
|
||
end try
|
||
end repeat
|
||
end if
|
||
return theText
|
||
end trimText
|
||
|
||
on getElementByClassAndName(targetName, targetClass, targetElements)
|
||
repeat with uiElem in targetElements
|
||
try
|
||
if (targetClass is false and name of uiElem is targetName) ¬
|
||
or (name of uiElem is targetName and class of uiElem is targetClass) then ¬
|
||
return uiElem
|
||
end try
|
||
end repeat
|
||
return false
|
||
end getElementByClassAndName
|
||
|
||
on isSaved(win)
|
||
try
|
||
set winName to name of win
|
||
if winName begins with "*" then return false
|
||
end try
|
||
return true
|
||
end isSaved
|