Add PDFExpert.scpt

This commit is contained in:
thanhtl 2025-05-23 15:46:37 +07:00
parent 33f9dcdafe
commit 69e84c1880

197
PDFExpert.scpt Normal file
View File

@ -0,0 +1,197 @@
(*
Batch-OCR selected PDFs in DEVONthink using PDF Expert
Originally by Alex Wellerstein adapted by ChatGPT, May 2025
Released to the public domain: absolutely no warranties, guarantees, promises, *anything*.
Tested on macOS 14.6.1 • DEVONthink 3.9.x • PDF Expert 3.10.10
*)
------------------------------------------------------------
-- 1. COLLECT THE SELECTION FROM DEVONTHINK
------------------------------------------------------------
tell application id "DNtp" -- DEVONthinks four-letter bundle ID
activate
set theSelection to selection
end tell
if theSelection is {} then
display dialog "Nothing is selected in DEVONthink." buttons {"Cancel"} default button 1 with icon caution
return
end if
-- Build a simple AppleScript list of POSIX paths for any *PDF* items.
set theFiles to {}
repeat with rec in theSelection
try
tell application id "DNtp"
if type of rec is PDF document then -- skip anything that isnt a PDF
set end of theFiles to POSIX path of (path of rec as string)
end if
end tell
end try
end repeat
if theFiles is {} then
display dialog "None of the selected items is a PDF." buttons {"Cancel"} default button 1 with icon caution
return
end if
------------------------------------------------------------
-- 2. SET UP A PROGRESS BAR
------------------------------------------------------------
set totalFiles to length of theFiles
set progress total steps to totalFiles
set progress completed steps to 0
set progress description to "Processing PDFs with PDF Expert…"
set progress additional description to "Starting…"
------------------------------------------------------------
-- 3. LOOP THROUGH EACH PDF AND RUN YOUR ORIGINAL OCR ROUTINE
------------------------------------------------------------
repeat with i from 1 to totalFiles
set thePath to item i of theFiles
set progress completed steps to i
set progress additional description to "Processing " & i & " of " & totalFiles & "…"
my pdfexpert_ocr(thePath) -- run your existing handler
-- Refresh the record inside DEVONthink so its thumbnails & metadata update
try
tell application id "DNtp"
set recList to lookup records with path thePath -- returns a list (usually length 1)
repeat with r in recList
update record r without asking -- silent refresh
end repeat
end tell
end try
-- Free up RAM every ten files (same logic you had)
if i mod 10 is 0 then
tell application "PDF Expert" to quit
delay 1
end if
end repeat
log "All done!"
display dialog "OCR finished for " & totalFiles & " PDF(s)." buttons {"OK"} default button 1
--------------------------------------------------------------------------------
-- ↓↓↓ EVERYTHING BELOW HERE IS YOUR ORIGINAL CODE, UNCHANGED ↓↓↓
--------------------------------------------------------------------------------
on pdfexpert_ocr(filePath)
log ("Starting OCR for " & filePath)
do shell script "open " & quoted form of filePath -- assumes PDF Expert is the default PDF viewer
delay 0.5
activate application "PDF Expert"
delay 0.5
set fileTitle to getFilename(filePath, ".pdf")
tell application "System Events"
tell process "PDF Expert"
-- locate the window that matches the file name
set win to false
repeat with w in every window
if (title of w) is fileTitle then set win to w
end repeat
if (win is false) then
delay 2 -- give PDF Expert a moment more
repeat with w in every window
if (title of w) is fileTitle then set win to w
end repeat
end if
if (win is false) then
display dialog "Could not find window for “" & fileTitle & "” in PDF Expert." buttons {"Cancel"} default button 1 with icon stop
error number -128
end if
-- click “Recognize Text” in Scan & OCR
click my getElementByClassAndName("Recognize Text", false, entire contents of win)
-- click “Recognize…” in the side bar
tell my getElementByClassAndName("Recognize...", false, entire contents of win)
click
-- choose “All” pages when the dialog appears (if >1 page)
if exists radio button "All" of sheet 1 of win then
click radio button "All" of sheet 1 of win
click button "Apply" of sheet 1 of win
end if
end tell
delay 1 -- short head-start before polling
repeat -- poll until the progress sheet disappears
try
if not (exists win) or (count sheet of win) = 0 then exit repeat
if not (exists progress indicator 1 of sheet 1 of win) then exit repeat
on error
exit repeat -- any UI error = assume finished
end try
end repeat
delay 1 -- extra cushion
-- save & close
set fileMenu to menu "File" of menu bar item "File" of menu bar 1
click my getElementByClassAndName("Save", false, entire contents of fileMenu)
delay 1
if my isSaved(front window) then
click my getElementByClassAndName("Close Window", false, entire contents of fileMenu)
else
log "WARNING: file might not have saved!"
end if
end tell
end tell
end pdfexpert_ocr
-- utility handlers (unchanged from your original)
on getFilename(theFile, theExtension)
tell application "Finder" to set fName to name of (POSIX file theFile as alias)
if (theExtension is not "") then
return trimText(fName, theExtension, "end")
else
return fName
end if
end getFilename
on trimText(theText, theCharactersToTrim, theTrimDirection)
set theTrimLength to length of theCharactersToTrim
if theTrimDirection is in {"beginning", "both"} then
repeat while theText begins with theCharactersToTrim
try
set theText to characters (theTrimLength + 1) thru -1 of theText as string
on error
return ""
end try
end repeat
end if
if theTrimDirection is in {"end", "both"} then
repeat while theText ends with theCharactersToTrim
try
set theText to characters 1 thru -(theTrimLength + 1) of theText as string
on error
return ""
end try
end repeat
end if
return theText
end trimText
on getElementByClassAndName(targetName, targetClass, targetElements)
repeat with uiElem in targetElements
try
if (targetClass is false and name of uiElem is targetName) ¬
or (name of uiElem is targetName and class of uiElem is targetClass) then ¬
return uiElem
end try
end repeat
return false
end getElementByClassAndName
on isSaved(win)
try
set winName to name of win
if winName begins with "*" then return false
end try
return true
end isSaved