|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
import sys, string, codecs, os
|
|
|
|
|
|
|
|
# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
|
|
|
|
# FIXME: it seems possible to get lines bigger than 80 characters.
|
|
|
|
max_length = 78
|
|
|
|
|
|
|
|
wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',
|
|
|
|
'<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',
|
|
|
|
'<blockquote>', '<pre>', '<hr>', '<hr/>']
|
|
|
|
|
|
|
|
### TODO: try to support any charset, not only UTF-8 (so that it can be used outside TDE)
|
|
|
|
|
|
|
|
def splitit( start, message, outfile ):
|
|
|
|
# print start+"\""+message+"\"" # DEBUG
|
|
|
|
if len(start):
|
|
|
|
if len(message) + len(start) < max_length and \
|
|
|
|
string.find(message, '\\n') == -1:
|
|
|
|
outstr = '%s"%s"\n' % (start, message)
|
|
|
|
outfile.write(outstr.encode('utf-8'))
|
|
|
|
return
|
|
|
|
outfile.write(start)
|
|
|
|
outfile.write(u'""\n')
|
|
|
|
index = 0
|
|
|
|
mlen = len(message)
|
|
|
|
last_brace = 0
|
|
|
|
last_space = 0
|
|
|
|
last_comma = 0
|
|
|
|
while index < mlen:
|
|
|
|
if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
|
|
|
|
and (index < 2 or message[index-2] != '\\'):
|
|
|
|
outstr = '"%s"\n' % message[:index+1]
|
|
|
|
outfile.write(outstr.encode('utf-8'))
|
|
|
|
message = message[index+1:]
|
|
|
|
mlen -= index + 1
|
|
|
|
index = 0
|
|
|
|
last_brace = 0
|
|
|
|
last_space = 0
|
|
|
|
last_comma = 0
|
|
|
|
continue
|
|
|
|
elif message[index] == u'>':
|
|
|
|
last_brace = index
|
|
|
|
elif message[index] == u' ':
|
|
|
|
last_space = index
|
|
|
|
elif message[index] == u',':
|
|
|
|
last_comma = index
|
|
|
|
elif message[index] == u'<':
|
|
|
|
for s in wrap_before:
|
|
|
|
if index > 0 and message[index:].startswith(s):
|
|
|
|
outstr = '"%s"\n' % message[:index]
|
|
|
|
outfile.write(outstr.encode('utf-8'))
|
|
|
|
message = message[index:]
|
|
|
|
mlen -= index
|
|
|
|
index = 0
|
|
|
|
last_brace = 0
|
|
|
|
last_space = 0
|
|
|
|
last_comma = 0
|
|
|
|
continue
|
|
|
|
if index > max_length:
|
|
|
|
if last_brace > 50:
|
|
|
|
index = last_brace
|
|
|
|
while index < mlen - 1 and message[index+1] == ' ':
|
|
|
|
index += 1
|
|
|
|
elif last_space != 0:
|
|
|
|
index = last_space
|
|
|
|
elif last_comma != 0:
|
|
|
|
index = last_comma
|
|
|
|
else:
|
|
|
|
while index > 0 and message[index] == u'\\':
|
|
|
|
index = index - 1
|
|
|
|
outstr = '"%s"\n' % message[:index+1]
|
|
|
|
outfile.write(outstr.encode('utf-8'))
|
|
|
|
message = message[index+1:]
|
|
|
|
mlen -= index + 1
|
|
|
|
index = 0
|
|
|
|
last_brace = 0
|
|
|
|
last_space = 0
|
|
|
|
last_comma = 0
|
|
|
|
continue
|
|
|
|
index += 1
|
|
|
|
if len(message):
|
|
|
|
outstr = '"%s"\n' % message
|
|
|
|
outfile.write(outstr.encode('utf-8'))
|
|
|
|
|
|
|
|
if sys.hexversion >= 0x02030000:
|
|
|
|
# We have Python 2.3 or better
|
|
|
|
open_type="rU" # Open for read with "Universal Newline Support"
|
|
|
|
else:
|
|
|
|
# We have a Python older than 2.3
|
|
|
|
open_type="r" # Normal open for read
|
|
|
|
### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
|
|
|
|
for file in sys.argv[1:]:
|
|
|
|
orig_file = open(file, open_type)
|
|
|
|
new_file = open(file + ".new", 'w')
|
|
|
|
|
|
|
|
last=''
|
|
|
|
start=''
|
|
|
|
index=0
|
|
|
|
line=' '
|
|
|
|
while 1: # python 2.1 has no True ;)
|
|
|
|
line = orig_file.readline()
|
|
|
|
index += 1
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
if line == '\n' or line[0] == '#':
|
|
|
|
splitit(start, last, new_file)
|
|
|
|
start = ''
|
|
|
|
last = ''
|
|
|
|
new_file.write(line)
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
line = string.strip(unicode(line, 'utf-8'))
|
|
|
|
except UnicodeError:
|
|
|
|
print file
|
|
|
|
if line[0] == '"' and line[-1:] == '"':
|
|
|
|
last += line[1:-1]
|
|
|
|
continue
|
|
|
|
# new message
|
|
|
|
splitit(start, last, new_file)
|
|
|
|
if line.startswith("msgid "):
|
|
|
|
start = "msgid "
|
|
|
|
last = string.lstrip(line[6:-1])[1:]
|
|
|
|
elif line.startswith("msgstr "):
|
|
|
|
start = "msgstr "
|
|
|
|
last = string.lstrip(line[7:-1])[1:]
|
|
|
|
elif line.startswith("msgctxt "):
|
|
|
|
start = "msgctxt "
|
|
|
|
last = string.lstrip(line[8:-1])[1:]
|
|
|
|
elif line.startswith("msgid_plural "):
|
|
|
|
start = "msgid_plural "
|
|
|
|
last = string.lstrip(line[13:-1])[1:]
|
|
|
|
elif line.startswith("msgstr["):
|
|
|
|
# For most languages, there will be only one digit
|
|
|
|
if line[8] == "]" and line[9] == " ":
|
|
|
|
if line[7].isdigit():
|
|
|
|
start = line[:10]
|
|
|
|
last = string.lstrip(line[10:-1])[1:]
|
|
|
|
else:
|
|
|
|
print file, "not-a-digit error for mgstr[] in line", index
|
|
|
|
orig_file.close()
|
|
|
|
new_file.close()
|
|
|
|
sys.exit(1)
|
|
|
|
else:
|
|
|
|
posdigit = 7 # The first digit is at position 7
|
|
|
|
while line[posdigit].isdigit():
|
|
|
|
posdigit += 1
|
|
|
|
if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
|
|
|
|
posdigit += 2 # skip ] and the space
|
|
|
|
start = line[:posdigit]
|
|
|
|
last = string.lstrip(line[posdigit:-1])[1:]
|
|
|
|
else:
|
|
|
|
print file, "parse error after msgstr[ in line", index
|
|
|
|
orig_file.close()
|
|
|
|
new_file.close()
|
|
|
|
sys.exit(1)
|
|
|
|
else:
|
|
|
|
print file, "parsing error in line", index
|
|
|
|
orig_file.close()
|
|
|
|
new_file.close()
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
splitit(start, last, new_file)
|
|
|
|
orig_file.close()
|
|
|
|
new_file.close()
|
|
|
|
os.rename(file + ".new", file)
|
|
|
|
|
|
|
|
# kate: space-indent off; indent-width 8; replace-tabs off;
|