improve page number recognition

This commit is contained in:
rootless 2023-10-14 10:53:56 +03:00
parent 62cbd87bad
commit d3b8bce6df

View File

@ -117,8 +117,8 @@ def zonecrop(png_path, z):
def img2pagenum(img_file, maxlen):
s = pytesseract.image_to_string(img_file,
lang='rus',
config='--psm 11')
lang='eng',
config='--psm 13 -c tessedit_char_whitelist=0123456789')
for r in '_-.,—':
s = s.replace(r, '')
s = s.strip()