use reasonable timeout for tesseract callsw

This commit is contained in:
rootless 2023-10-14 11:09:06 +03:00
parent cfac219b06
commit b7c4e402f9

View File

@ -1,10 +1,11 @@
import pytesseract import pytesseract
import subprocess import subprocess
import logging
from .system import randomtempname from .system import randomtempname
from PIL import Image from PIL import Image
_logger = logging.getLogger(__name__)
ZONES = ('topleft', 'topright', ZONES = ('topleft', 'topright',
'bottomleft', 'bottomright', 'bottomleft', 'bottomright',
'topcenter', 'bottomcenter') 'topcenter', 'bottomcenter')
@ -54,8 +55,7 @@ class Zone:
self.margin_top, self.margin_top,
self.margin_right, self.margin_right,
self.margin_bottom, self.margin_bottom,
self.margin_left self.margin_left)
)
@staticmethod @staticmethod
def from_string(s): def from_string(s):
@ -92,7 +92,6 @@ def zonecrop(png_path, z):
elif z.isright(): elif z.isright():
x1 = iw-z.width x1 = iw-z.width
elif z.iscenter(): elif z.iscenter():
# not tested
x1 = int(iw/2-z.width/2) x1 = int(iw/2-z.width/2)
if z.istop(): if z.istop():
@ -116,9 +115,15 @@ def zonecrop(png_path, z):
def img2pagenum(img_file, maxlen): def img2pagenum(img_file, maxlen):
try:
s = pytesseract.image_to_string(img_file, s = pytesseract.image_to_string(img_file,
timeout=10,
lang='eng', lang='eng',
config='--psm 13 -c tessedit_char_whitelist=0123456789') config='--psm 13 -c tessedit_char_whitelist=0123456789')
except RuntimeError as timeout_error:
_logger.exception(timeout_error)
return None
for r in '_-.,—': for r in '_-.,—':
s = s.replace(r, '') s = s.replace(r, '')
s = s.strip() s = s.strip()