#!/usr/bin/env python # Time-stamp: <2007-01-28 13:29:21 poser> # Convert MS Codepage 1250 and 1252 (Winlatin 1 and 2) characters to ASCII # Where this is not possible, strip them. # Copyright 2004-2007 William J. Poser (billposer@alum.mit.edu) # This program is free software; you can redistribute it and/or modify # it under the terms of versio 2 of the GNU General Public License # as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # or go to the web page: http://www.gnu.org/licenses/gpl.txt. import sys import os ByteOffset = 0 ReplacementCnt = 0 StripCnt = 0 # Read one byte at a time and replace if necessary while 1: try: ch = sys.stdin.read(1) if not ch: sys.exit(0) c = ord(ch) ByteOffset = ByteOffset + 1 if c == 0x85: # Ellipsis sys.stdout.write(chr(0x2E)) sys.stdout.write(chr(0x2E)) sys.stdout.write(chr(0x2E)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x92: # Apostrophe sys.stdout.write(chr(0x27)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x96: # Hyphen sys.stdout.write(chr(0x2D)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x91: # Left single quotation mark sys.stdout.write(chr(0x60)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x92: # Right single quotation mark sys.stdout.write(chr(0x27)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x93: # Left double quotation mark sys.stdout.write(chr(0x22)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x94: # Right double quotation mark sys.stdout.write(chr(0x22)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x96: # En-dash sys.stdout.write(chr(0x2D)) ReplacementCnt = ReplacementCnt + 1 elif c == 0x97: # Em-dash sys.stdout.write(chr(0x2D)) sys.stdout.write(chr(0x2D)) ReplacementCnt = ReplacementCnt + 1 elif c > 0x7F: sys.stderr.write(("Skipped non-ASCII character %02X at byte\n") %(c,ByteOffset)) StripCnt = StripCnt + 1 else: sys.stdout.write(ch) except IOError: sys.exit(0) except SystemExit: sys.stderr.write("Replaced %3d characters.\n" % ReplacementCnt) sys.stderr.write("Stripped %3d characters.\n" % StripCnt) sys.exit(0)