blob: 9331fb26f9e3d371444a4eceff6b7ee9ce1076ba (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
# -*- coding: utf-8 -*-
from utils.wikisource import get_page
from utils.djvu_utils import parse_book
import utils.string_utils as su
wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
#wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")
n = 88
ocrpage = parse_book(wikibook, n)
l1, c1 = zip(*ocrpage[0])
l1 = list(l1)
c1 = list(c1)
l2 = get_page(wikibook, n)
l3 = su.simplify(l2)
C = su.align(l3.split(), l1, c1)
su.print_alignment(l2.split(), l1, c1, C[1])
|