tde-packaging/ubuntu/maverick/tdesdk/debian/desktop-i18n/msgsplit

#! /usr/bin/env python

import sys, string, codecs, os

# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
# FIXME: it seems possible to get lines bigger than 80 characters.
max_length = 78

wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>', 
	       '<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>', 
	       '<blockquote>', '<pre>', '<hr>', '<hr/>']

### TODO: try to support any charset, not only UTF-8 (so that it can be used outside TDE)

def splitit( start, message, outfile ):
	# print start+"\""+message+"\"" # DEBUG
	if len(start):
		if len(message) + len(start) < max_length and \
			   string.find(message, '\\n') == -1:
			outstr = '%s"%s"\n' % (start, message)
			outfile.write(outstr.encode('utf-8'))
			return
		outfile.write(start)
		outfile.write(u'""\n')
		index = 0
		mlen = len(message)
		last_brace = 0
		last_space = 0
		last_comma = 0
		while index < mlen:
			if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
			   and (index < 2 or message[index-2] != '\\'):
				outstr = '"%s"\n' % message[:index+1]
				outfile.write(outstr.encode('utf-8'))
				message = message[index+1:]
				mlen -= index + 1
				index = 0
				last_brace = 0
				last_space = 0
				last_comma = 0
				continue
			elif message[index] == u'>':
				last_brace = index
			elif message[index] == u' ':
				last_space = index
			elif message[index] == u',':
				last_comma = index
			elif message[index] == u'<':
				for s in wrap_before:
					if index > 0 and message[index:].startswith(s):
						outstr = '"%s"\n' % message[:index]
						outfile.write(outstr.encode('utf-8'))
						message = message[index:]
						mlen -= index
						index = 0
						last_brace = 0
						last_space = 0
						last_comma = 0
						continue
			if index > max_length:
				if last_brace > 50:
					index = last_brace
					while index < mlen - 1 and message[index+1] == ' ':
						index += 1
				elif last_space != 0:
					index = last_space
				elif last_comma != 0:
					index = last_comma
				else:
					while index > 0 and message[index] == u'\\':
						index = index - 1
				outstr = '"%s"\n' % message[:index+1]
				outfile.write(outstr.encode('utf-8'))
				message = message[index+1:]
				mlen -= index + 1
				index = 0
				last_brace = 0
				last_space = 0
				last_comma = 0
				continue
			index += 1
		if len(message):
			outstr = '"%s"\n' % message
			outfile.write(outstr.encode('utf-8'))

if sys.hexversion >= 0x02030000:
	# We have Python 2.3 or better
	open_type="rU" # Open for read with "Universal Newline Support"
else:
	# We have a Python older than 2.3
	open_type="r" # Normal open for read
### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
for file in sys.argv[1:]:
	orig_file = open(file, open_type)
	new_file = open(file + ".new", 'w')
	
	last=''
	start=''
	index=0
	line=' '
	while 1: # python 2.1 has no True ;)
		line = orig_file.readline()
		index += 1
		if not line:
			break
		if line == '\n' or line[0] == '#':
			splitit(start, last, new_file)
			start = ''
			last = ''
			new_file.write(line)
			continue
		try:
			line = string.strip(unicode(line, 'utf-8'))
		except UnicodeError:
			print file
		if line[0] == '"' and line[-1:] == '"':
			last += line[1:-1]
			continue
		# new message
		splitit(start, last, new_file)
		if line.startswith("msgid "):
			start = "msgid "
			last = string.lstrip(line[6:-1])[1:]
		elif line.startswith("msgstr "):
			start = "msgstr "
			last = string.lstrip(line[7:-1])[1:]
		elif line.startswith("msgctxt "):
			start = "msgctxt "
			last = string.lstrip(line[8:-1])[1:]
		elif line.startswith("msgid_plural "):
			start = "msgid_plural "
			last = string.lstrip(line[13:-1])[1:]
		elif line.startswith("msgstr["):
			# For most languages, there will be only one digit
			if line[8] == "]" and line[9] == " ":
				if line[7].isdigit():
					start = line[:10]
					last = string.lstrip(line[10:-1])[1:]
				else:
					print file, "not-a-digit error for mgstr[] in line", index
					orig_file.close()
					new_file.close()
					sys.exit(1)
			else:
				posdigit = 7 # The first digit is at position 7
				while line[posdigit].isdigit():
					posdigit += 1
				if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
					posdigit += 2 # skip ] and the space
					start = line[:posdigit]
					last = string.lstrip(line[posdigit:-1])[1:]
				else:
					print file, "parse error after msgstr[ in line", index
					orig_file.close()
					new_file.close()
					sys.exit(1)
		else:
			print file, "parsing error in line", index
			orig_file.close()
			new_file.close()
			sys.exit(1)

	splitit(start, last, new_file)
	orig_file.close()
	new_file.close()
	os.rename(file + ".new", file)

# kate:  space-indent off; indent-width 8; replace-tabs off;
Add initial Debian Lenny/Squeeze packaging as well as symlink Lucid to Maverick 13 years ago			`#! /usr/bin/env python`

			`import sys, string, codecs, os`

			`# TODO: currently the 78 chars are without the quotes, while for Gettext it is with the quotes`
			`# FIXME: it seems possible to get lines bigger than 80 characters.`
			`max_length = 78`

			`wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',`
			`'<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',`
			`'<blockquote>', '<pre>', '<hr>', '<hr/>']`

Branding cleanup on Debian and Ubuntu 11 years ago			`### TODO: try to support any charset, not only UTF-8 (so that it can be used outside TDE)`
Add initial Debian Lenny/Squeeze packaging as well as symlink Lucid to Maverick 13 years ago
			`def splitit( start, message, outfile ):`
			`# print start+"\""+message+"\"" # DEBUG`
			`if len(start):`
			`if len(message) + len(start) < max_length and \`
			`string.find(message, '\\n') == -1:`
			`outstr = '%s"%s"\n' % (start, message)`
			`outfile.write(outstr.encode('utf-8'))`
			`return`
			`outfile.write(start)`
			`outfile.write(u'""\n')`
			`index = 0`
			`mlen = len(message)`
			`last_brace = 0`
			`last_space = 0`
			`last_comma = 0`
			`while index < mlen:`
			`if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \`
			`and (index < 2 or message[index-2] != '\\'):`
			`outstr = '"%s"\n' % message[:index+1]`
			`outfile.write(outstr.encode('utf-8'))`
			`message = message[index+1:]`
			`mlen -= index + 1`
			`index = 0`
			`last_brace = 0`
			`last_space = 0`
			`last_comma = 0`
			`continue`
			`elif message[index] == u'>':`
			`last_brace = index`
			`elif message[index] == u' ':`
			`last_space = index`
			`elif message[index] == u',':`
			`last_comma = index`
			`elif message[index] == u'<':`
			`for s in wrap_before:`
			`if index > 0 and message[index:].startswith(s):`
			`outstr = '"%s"\n' % message[:index]`
			`outfile.write(outstr.encode('utf-8'))`
			`message = message[index:]`
			`mlen -= index`
			`index = 0`
			`last_brace = 0`
			`last_space = 0`
			`last_comma = 0`
			`continue`
			`if index > max_length:`
			`if last_brace > 50:`
			`index = last_brace`
			`while index < mlen - 1 and message[index+1] == ' ':`
			`index += 1`
			`elif last_space != 0:`
			`index = last_space`
			`elif last_comma != 0:`
			`index = last_comma`
			`else:`
			`while index > 0 and message[index] == u'\\':`
			`index = index - 1`
			`outstr = '"%s"\n' % message[:index+1]`
			`outfile.write(outstr.encode('utf-8'))`
			`message = message[index+1:]`
			`mlen -= index + 1`
			`index = 0`
			`last_brace = 0`
			`last_space = 0`
			`last_comma = 0`
			`continue`
			`index += 1`
			`if len(message):`
			`outstr = '"%s"\n' % message`
			`outfile.write(outstr.encode('utf-8'))`

			`if sys.hexversion >= 0x02030000:`
			`# We have Python 2.3 or better`
			`open_type="rU" # Open for read with "Universal Newline Support"`
			`else:`
			`# We have a Python older than 2.3`
			`open_type="r" # Normal open for read`
			`### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.`
			`for file in sys.argv[1:]:`
			`orig_file = open(file, open_type)`
			`new_file = open(file + ".new", 'w')`

			`last=''`
			`start=''`
			`index=0`
			`line=' '`
			`while 1: # python 2.1 has no True ;)`
			`line = orig_file.readline()`
			`index += 1`
			`if not line:`
			`break`
			`if line == '\n' or line[0] == '#':`
			`splitit(start, last, new_file)`
			`start = ''`
			`last = ''`
			`new_file.write(line)`
			`continue`
			`try:`
			`line = string.strip(unicode(line, 'utf-8'))`
			`except UnicodeError:`
			`print file`
			`if line[0] == '"' and line[-1:] == '"':`
			`last += line[1:-1]`
			`continue`
			`# new message`
			`splitit(start, last, new_file)`
			`if line.startswith("msgid "):`
			`start = "msgid "`
			`last = string.lstrip(line[6:-1])[1:]`
			`elif line.startswith("msgstr "):`
			`start = "msgstr "`
			`last = string.lstrip(line[7:-1])[1:]`
			`elif line.startswith("msgctxt "):`
			`start = "msgctxt "`
			`last = string.lstrip(line[8:-1])[1:]`
			`elif line.startswith("msgid_plural "):`
			`start = "msgid_plural "`
			`last = string.lstrip(line[13:-1])[1:]`
			`elif line.startswith("msgstr["):`
			`# For most languages, there will be only one digit`
			`if line[8] == "]" and line[9] == " ":`
			`if line[7].isdigit():`
			`start = line[:10]`
			`last = string.lstrip(line[10:-1])[1:]`
			`else:`
			`print file, "not-a-digit error for mgstr[] in line", index`
			`orig_file.close()`
			`new_file.close()`
			`sys.exit(1)`
			`else:`
			`posdigit = 7 # The first digit is at position 7`
			`while line[posdigit].isdigit():`
			`posdigit += 1`
			`if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":`
			`posdigit += 2 # skip ] and the space`
			`start = line[:posdigit]`
			`last = string.lstrip(line[posdigit:-1])[1:]`
			`else:`
			`print file, "parse error after msgstr[ in line", index`
			`orig_file.close()`
			`new_file.close()`
			`sys.exit(1)`
			`else:`
			`print file, "parsing error in line", index`
			`orig_file.close()`
			`new_file.close()`
			`sys.exit(1)`

			`splitit(start, last, new_file)`
			`orig_file.close()`
			`new_file.close()`
			`os.rename(file + ".new", file)`

			`# kate: space-indent off; indent-width 8; replace-tabs off;`