TODO:

sub NormalizeText {
    my ($norm_text) = @_;

# language-independent part:
    $norm_text =~ s/<skipped>//g; # strip "skipped" tags
    $norm_text =~ s/-\n//g; # strip end-of-line hyphenation and join lines
    ...

# language-dependent part (assuming Western languages):
    $norm_text = " $norm_text ";
    $norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
    # BEWARE! PRIDAL JSEM RUCNE !!!
    $norm_text =~ tr/ÁÉÍÓÚŮČĎĚŘŠŤŽ/áéíóúůčďěřšťž/ unless $preserve_case;
    # BEWARE! PRIDAL JSEM RUCNE !!!