Archives / Snippets / Projets

!/usr/bin/env python

coding: utf-8

import sys

if len(sys.argv) != 3:

print("Usage:\npython {} input.file output.file".format(sys.argv[0]))
sys.exit(0)

entree = open(sys.argv[1], 'r') sortie = open(sys.argv[2], 'w')

special = {

u'\u0153' : 'oe',  # ligature oe
u'\u266a' : '#',   # musique
u'\u2019' : '\'',  # guillemet simple
u'\u2026' : '...', # dots
u'\ufeff' : '',    # 0-width space
'<i>' : '',        # strip html tags
'</i>' : '',       # strip html tags

}

def dic_replace(text, dic):

for key in dic:
    text = text.replace(key, dic[key])
return text

for line in entree:

try:
    nospecial = dic_replace(line.decode('utf-8'), special)
    decoded = nospecial.encode('latin1', 'strict')
    sortie.write(decoded)
except UnicodeEncodeError:
    print(line)
    print(line.decode('utf-8'),)