Skip to content

workflow_ocr cannot handle long-running OCRs #311

@nihebe

Description

@nihebe

Describe the bug

workflow_ocr throws an error and aborts the flow when the ocrmyPDF-container needs more than 60 seconds to answer. On low-power systems (like a Synology NAS) this is no unusual processing time, especially when more than one file is processed at a time.

System

  • App version: 1.31.0
  • Nextcloud version: 31.0.5
  • PHP version: 8.3.20
  • Environment: Native Debian VM for Nextcloud.
  • ocrmypdf version: OCRmyPDF is deployed with "Workflow OCR Backend"-App (R0Wi-DEV/workflow_ocr_backend, version 1.31.1)

How to reproduce

Steps to reproduce the behavior:

  1. Deploy on a low-power system (I have a VM with 1,5 GB RAM and 2 vCPUs from a Synology-NAS)
  2. Configure any triggers for the flow which makes it process files
  3. Throw three files with ~3 DIN A4 pages of text at it
  4. See error

Server log

{"reqId":"fkUdtAR19u9HzAXHLcxg","level":2,"time":"2025-05-25T20:17:30+00:00","remoteAddr":"","user":"Jeanette","app":"app_api","method":"","url":"--","message":"Error during request to ExApp workflow_ocr_backend: cURL error 28: Operation timed out after 60001 milliseconds with 0 bytes received (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) for http://localhost:23000/process_ocr","userAgent":"--","version":"31.0.5.1","exception":{"Exception":"GuzzleHttp\\Exception\\ConnectException","Message":"cURL error 28: Operation timed out after 60001 milliseconds with 0 bytes received (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) for http://localhost:23000/process_ocr","Code":0,"Trace":[{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Handler/CurlFactory.php","line":205,"function":"createRejection","class":"GuzzleHttp\\Handler\\CurlFactory","type":"::","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Handler/CurlFactory.php","line":157,"function":"finishError","class":"GuzzleHttp\\Handler\\CurlFactory","type":"::"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Handler/CurlHandler.php","line":47,"function":"finish","class":"GuzzleHttp\\Handler\\CurlFactory","type":"::"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Middleware.php","line":142,"function":"__invoke","class":"GuzzleHttp\\Handler\\CurlHandler","type":"->"},{"file":"/var/www/nextcloud/lib/private/Http/Client/DnsPinMiddleware.php","line":109,"function":"GuzzleHttp\\{closure}","class":"GuzzleHttp\\Middleware","type":"::","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/PrepareBodyMiddleware.php","line":64,"function":"OC\\Http\\Client\\{closure}","class":"OC\\Http\\Client\\DnsPinMiddleware","type":"->","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Middleware.php","line":31,"function":"__invoke","class":"GuzzleHttp\\PrepareBodyMiddleware","type":"->"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/RedirectMiddleware.php","line":71,"function":"GuzzleHttp\\{closure}","class":"GuzzleHttp\\Middleware","type":"::","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Middleware.php","line":63,"function":"__invoke","class":"GuzzleHttp\\RedirectMiddleware","type":"->"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/HandlerStack.php","line":75,"function":"GuzzleHttp\\{closure}","class":"GuzzleHttp\\Middleware","type":"::","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Client.php","line":333,"function":"__invoke","class":"GuzzleHttp\\HandlerStack","type":"->"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Client.php","line":169,"function":"transfer","class":"GuzzleHttp\\Client","type":"->"},{"file":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Client.php","line":189,"function":"requestAsync","class":"GuzzleHttp\\Client","type":"->","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/lib/private/Http/Client/Client.php","line":277,"function":"request","class":"GuzzleHttp\\Client","type":"->"},{"file":"/var/www/nextcloud/apps/app_api/lib/Service/AppAPIService.php","line":99,"function":"post","class":"OC\\Http\\Client\\Client","type":"->","args":["*** sensitive parameters replaced ***"]},{"file":"/var/www/nextcloud/apps/app_api/lib/Service/AppAPIService.php","line":69,"function":"requestToExAppInternal","class":"OCA\\AppAPI\\Service\\AppAPIService","type":"->"},{"file":"/var/www/nextcloud/apps/app_api/lib/PublicFunctions.php","line":42,"function":"requestToExApp","class":"OCA\\AppAPI\\Service\\AppAPIService","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/Wrapper/AppApiWrapper.php","line":53,"function":"exAppRequest","class":"OCA\\AppAPI\\PublicFunctions","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/Client/ApiClient.php","line":86,"function":"exAppRequest","class":"OCA\\WorkflowOcr\\Wrapper\\AppApiWrapper","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/Client/ApiClient.php","line":57,"function":"exAppRequest","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\Client\\ApiClient","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php","line":55,"function":"processOcr","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\Client\\ApiClient","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/Service/OcrService.php","line":160,"function":"ocrFile","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\WorkflowOcrRemoteProcessor","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/Service/OcrService.php","line":128,"function":"runOcrProcess","class":"OCA\\WorkflowOcr\\Service\\OcrService","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/BackgroundJobs/ProcessFileJob.php","line":57,"function":"runOcrProcessWithJobArgument","class":"OCA\\WorkflowOcr\\Service\\OcrService","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/Job.php","line":61,"function":"run","class":"OCA\\WorkflowOcr\\BackgroundJobs\\ProcessFileJob","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/QueuedJob.php","line":43,"function":"start","class":"OCP\\BackgroundJob\\Job","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/QueuedJob.php","line":29,"function":"start","class":"OCP\\BackgroundJob\\QueuedJob","type":"->"},{"file":"/var/www/nextcloud/cron.php","line":170,"function":"execute","class":"OCP\\BackgroundJob\\QueuedJob","type":"->"}],"File":"/var/www/nextcloud/3rdparty/guzzlehttp/guzzle/src/Handler/CurlFactory.php","Line":275,"message":"Error during request to ExApp workflow_ocr_backend: cURL error 28: Operation timed out after 60001 milliseconds with 0 bytes received (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) for http://localhost:23000/process_ocr","exception":{},"CustomMessage":"Error during request to ExApp workflow_ocr_backend: cURL error 28: Operation timed out after 60001 milliseconds with 0 bytes received (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) for http://localhost:23000/process_ocr"}}
{"reqId":"fkUdtAR19u9HzAXHLcxg","level":3,"time":"2025-05-25T20:17:30+00:00","remoteAddr":"","user":"Jeanette","app":"workflow_ocr","method":"","url":"--","message":"Request failed","userAgent":"--","version":"31.0.5.1","data":{"app":"workflow_ocr","path":"/process_ocr","response":"{\"error\":\"cURL error 28: Operation timed out after 60001 milliseconds with 0 bytes received (see https://curl.haxx.se/libcurl/c/libcurl-errors.html) for http://localhost:23000/process_ocr\"}"}}
{"reqId":"fkUdtAR19u9HzAXHLcxg","level":3,"time":"2025-05-25T20:17:30+00:00","remoteAddr":"","user":"--","app":"workflow_ocr","method":"","url":"--","message":"Request failed","userAgent":"--","version":"31.0.5.1","exception":{"Exception":"RuntimeException","Message":"Request failed","Code":0,"Trace":[{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/Client/ApiClient.php","line":57,"function":"exAppRequest","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\Client\\ApiClient","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php","line":55,"function":"processOcr","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\Client\\ApiClient","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/Service/OcrService.php","line":160,"function":"ocrFile","class":"OCA\\WorkflowOcr\\OcrProcessors\\Remote\\WorkflowOcrRemoteProcessor","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/Service/OcrService.php","line":128,"function":"runOcrProcess","class":"OCA\\WorkflowOcr\\Service\\OcrService","type":"->"},{"file":"/var/www/nextcloud/apps/workflow_ocr/lib/BackgroundJobs/ProcessFileJob.php","line":57,"function":"runOcrProcessWithJobArgument","class":"OCA\\WorkflowOcr\\Service\\OcrService","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/Job.php","line":61,"function":"run","class":"OCA\\WorkflowOcr\\BackgroundJobs\\ProcessFileJob","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/QueuedJob.php","line":43,"function":"start","class":"OCP\\BackgroundJob\\Job","type":"->"},{"file":"/var/www/nextcloud/lib/public/BackgroundJob/QueuedJob.php","line":29,"function":"start","class":"OCP\\BackgroundJob\\QueuedJob","type":"->"},{"file":"/var/www/nextcloud/cron.php","line":170,"function":"execute","class":"OCP\\BackgroundJob\\QueuedJob","type":"->"}],"File":"/var/www/nextcloud/apps/workflow_ocr/lib/OcrProcessors/Remote/Client/ApiClient.php","Line":98,"message":"Request failed","exception":{},"CustomMessage":"Request failed"}}

Additional context

This might be considered more a "feature request" than a bug – I hope this is a good way to hand it in anyways. Thanks for your work so far, appreciate it!

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions