use reasonable timeout for tesseract callsw

This commit is contained in:
rootless 2023-10-14 11:09:06 +03:00
parent cfac219b06
commit b7c4e402f9

View File

@ -1,10 +1,11 @@
import pytesseract
import subprocess
import logging
from .system import randomtempname
from PIL import Image
_logger = logging.getLogger(__name__)
ZONES = ('topleft', 'topright',
'bottomleft', 'bottomright',
'topcenter', 'bottomcenter')
@ -54,8 +55,7 @@ class Zone:
self.margin_top,
self.margin_right,
self.margin_bottom,
self.margin_left
)
self.margin_left)
@staticmethod
def from_string(s):
@ -92,7 +92,6 @@ def zonecrop(png_path, z):
elif z.isright():
x1 = iw-z.width
elif z.iscenter():
# not tested
x1 = int(iw/2-z.width/2)
if z.istop():
@ -116,9 +115,15 @@ def zonecrop(png_path, z):
def img2pagenum(img_file, maxlen):
s = pytesseract.image_to_string(img_file,
lang='eng',
config='--psm 13 -c tessedit_char_whitelist=0123456789')
try:
s = pytesseract.image_to_string(img_file,
timeout=10,
lang='eng',
config='--psm 13 -c tessedit_char_whitelist=0123456789')
except RuntimeError as timeout_error:
_logger.exception(timeout_error)
return None
for r in '_-.,—':
s = s.replace(r, '')
s = s.strip()