use reasonable timeout for tesseract callsw
This commit is contained in:
parent
cfac219b06
commit
b7c4e402f9
@ -1,10 +1,11 @@
|
||||
import pytesseract
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
from .system import randomtempname
|
||||
from PIL import Image
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
ZONES = ('topleft', 'topright',
|
||||
'bottomleft', 'bottomright',
|
||||
'topcenter', 'bottomcenter')
|
||||
@ -54,8 +55,7 @@ class Zone:
|
||||
self.margin_top,
|
||||
self.margin_right,
|
||||
self.margin_bottom,
|
||||
self.margin_left
|
||||
)
|
||||
self.margin_left)
|
||||
|
||||
@staticmethod
|
||||
def from_string(s):
|
||||
@ -92,7 +92,6 @@ def zonecrop(png_path, z):
|
||||
elif z.isright():
|
||||
x1 = iw-z.width
|
||||
elif z.iscenter():
|
||||
# not tested
|
||||
x1 = int(iw/2-z.width/2)
|
||||
|
||||
if z.istop():
|
||||
@ -116,9 +115,15 @@ def zonecrop(png_path, z):
|
||||
|
||||
|
||||
def img2pagenum(img_file, maxlen):
|
||||
s = pytesseract.image_to_string(img_file,
|
||||
lang='eng',
|
||||
config='--psm 13 -c tessedit_char_whitelist=0123456789')
|
||||
try:
|
||||
s = pytesseract.image_to_string(img_file,
|
||||
timeout=10,
|
||||
lang='eng',
|
||||
config='--psm 13 -c tessedit_char_whitelist=0123456789')
|
||||
except RuntimeError as timeout_error:
|
||||
_logger.exception(timeout_error)
|
||||
return None
|
||||
|
||||
for r in '_-.,—':
|
||||
s = s.replace(r, '')
|
||||
s = s.strip()
|
||||
|
Loading…
x
Reference in New Issue
Block a user