improve page number recognition
This commit is contained in:
parent
62cbd87bad
commit
d3b8bce6df
@ -117,8 +117,8 @@ def zonecrop(png_path, z):
|
||||
|
||||
def img2pagenum(img_file, maxlen):
|
||||
s = pytesseract.image_to_string(img_file,
|
||||
lang='rus',
|
||||
config='--psm 11')
|
||||
lang='eng',
|
||||
config='--psm 13 -c tessedit_char_whitelist=0123456789')
|
||||
for r in '_-.,—':
|
||||
s = s.replace(r, '')
|
||||
s = s.strip()
|
||||
|
Loading…
x
Reference in New Issue
Block a user