diff options
Diffstat (limited to 'ishtar_common/unicode_csv.py')
-rw-r--r-- | ishtar_common/unicode_csv.py | 79 |
1 files changed, 0 insertions, 79 deletions
diff --git a/ishtar_common/unicode_csv.py b/ishtar_common/unicode_csv.py deleted file mode 100644 index d0d39f7fb..000000000 --- a/ishtar_common/unicode_csv.py +++ /dev/null @@ -1,79 +0,0 @@ -import csv, codecs, cStringIO - -def utf_8_encoder(unicode_csv_data): - for line in unicode_csv_data: - yield line.encode('utf-8') - -def unicode_csv_reader(unicode_csv_data, dialect=None, reference_header=[], - **kwargs): - if not dialect: - dialect = csv.Sniffer().sniff(unicode_csv_data[0]) - # csv.py don't like unicode - dialect.delimiter = str(dialect.delimiter) - dialect.quotechar = str(dialect.quotechar) - # csv.py doesn't do Unicode; encode temporarily as UTF-8: - csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), - dialect=dialect, **kwargs) - for row in csv_reader: - # decode UTF-8 back to Unicode, cell by cell: - yield [unicode(cell, 'utf-8') for cell in row] - -class UTF8Recoder: - """ - Iterator that reads an encoded stream and reencodes the input to UTF-8 - """ - def __init__(self, f, encoding): - self.reader = codecs.getreader(encoding)(f) - - def __iter__(self): - return self - - def next(self): - return self.reader.next().encode("utf-8") - -class UnicodeReader: - """ - A CSV reader which will iterate over lines in the CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - f = UTF8Recoder(f, encoding) - self.reader = csv.reader(f, dialect=dialect, **kwds) - - def next(self): - row = self.reader.next() - return [unicode(s, "utf-8") for s in row] - - def __iter__(self): - return self - -class UnicodeWriter: - """ - A CSV writer which will write rows to CSV file "f", - which is encoded in the given encoding. - """ - - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - # Redirect output to a queue - self.queue = cStringIO.StringIO() - self.writer = csv.writer(self.queue, dialect=dialect, **kwds) - self.stream = f - self.encoder = codecs.getincrementalencoder(encoding)() - - def writerow(self, row): - self.writer.writerow([s.encode("utf-8") for s in row]) - # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") - # ... and reencode it into the target encoding - data = self.encoder.encode(data) - # write to the target stream - self.stream.write(data) - # empty queue - self.queue.truncate(0) - - def writerows(self, rows): - for row in rows: - self.writerow(row) - |