You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tde-packaging/ubuntu/maverick/tdelibs/debian/kubuntu-desktop-i18n/msgsplit

169 lines
4.8 KiB

#! /usr/bin/env python
import sys, string, codecs, os
# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
# FIXME: it seems possible to get lines bigger than 80 characters.
max_length = 78
wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',
'<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',
'<blockquote>', '<pre>', '<hr>', '<hr/>']
### TODO: try to support any charset, not only UTF-8 (so that it can be used outside KDE)
def splitit( start, message, outfile ):
# print start+"\""+message+"\"" # DEBUG
if len(start):
if len(message) + len(start) < max_length and \
string.find(message, '\\n') == -1:
outstr = '%s"%s"\n' % (start, message)
outfile.write(outstr.encode('utf-8'))
return
outfile.write(start)
outfile.write(u'""\n')
index = 0
mlen = len(message)
last_brace = 0
last_space = 0
last_comma = 0
while index < mlen:
if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
and (index < 2 or message[index-2] != '\\'):
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
elif message[index] == u'>':
last_brace = index
elif message[index] == u' ':
last_space = index
elif message[index] == u',':
last_comma = index
elif message[index] == u'<':
for s in wrap_before:
if index > 0 and message[index:].startswith(s):
outstr = '"%s"\n' % message[:index]
outfile.write(outstr.encode('utf-8'))
message = message[index:]
mlen -= index
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
if index > max_length:
if last_brace > 50:
index = last_brace
while index < mlen - 1 and message[index+1] == ' ':
index += 1
elif last_space != 0:
index = last_space
elif last_comma != 0:
index = last_comma
else:
while index > 0 and message[index] == u'\\':
index = index - 1
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
index += 1
if len(message):
outstr = '"%s"\n' % message
outfile.write(outstr.encode('utf-8'))
if sys.hexversion >= 0x02030000:
# We have Python 2.3 or better
open_type="rU" # Open for read with "Universal Newline Support"
else:
# We have a Python older than 2.3
open_type="r" # Normal open for read
### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
for file in sys.argv[1:]:
orig_file = open(file, open_type)
new_file = open(file + ".new", 'w')
last=''
start=''
index=0
line=' '
while 1: # python 2.1 has no True ;)
line = orig_file.readline()
index += 1
if not line:
break
if line == '\n' or line[0] == '#':
splitit(start, last, new_file)
start = ''
last = ''
new_file.write(line)
continue
try:
line = string.strip(unicode(line, 'utf-8'))
except UnicodeError:
print file
if line[0] == '"' and line[-1:] == '"':
last += line[1:-1]
continue
# new message
splitit(start, last, new_file)
if line.startswith("msgid "):
start = "msgid "
last = string.lstrip(line[6:-1])[1:]
elif line.startswith("msgstr "):
start = "msgstr "
last = string.lstrip(line[7:-1])[1:]
elif line.startswith("msgctxt "):
start = "msgctxt "
last = string.lstrip(line[8:-1])[1:]
elif line.startswith("msgid_plural "):
start = "msgid_plural "
last = string.lstrip(line[13:-1])[1:]
elif line.startswith("msgstr["):
# For most languages, there will be only one digit
if line[8] == "]" and line[9] == " ":
if line[7].isdigit():
start = line[:10]
last = string.lstrip(line[10:-1])[1:]
else:
print file, "not-a-digit error for mgstr[] in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
posdigit = 7 # The first digit is at position 7
while line[posdigit].isdigit():
posdigit += 1
if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
posdigit += 2 # skip ] and the space
start = line[:posdigit]
last = string.lstrip(line[posdigit:-1])[1:]
else:
print file, "parse error after msgstr[ in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
print file, "parsing error in line", index
orig_file.close()
new_file.close()
sys.exit(1)
splitit(start, last, new_file)
orig_file.close()
new_file.close()
os.rename(file + ".new", file)
# kate: space-indent off; indent-width 8; replace-tabs off;