You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
435 lines
14 KiB
435 lines
14 KiB
Import data from a XHTML file to a KexiDB table.
This script implements import of data from a XHTML file to a KexiDB table. The
table needs to be an already existing table the data should be added to.
Sebastian Sauer <>
Dual-licensed under LGPL v2+higher and the BSD license.
class SaxInput:
""" The inputsource we like to import the data from. This class
provides us abstract access to the SAX XML parser we use internaly
to import data from the XML-file. """
xmlfile = None
""" The XML file we should read the content from. """
def __init__(self):
""" Constructor. """
# try to import the xml.sax python module.
import xml.sax.saxlib
import xml.sax.saxexts
raise Exception("Import of the python xml.sax.saxlib module failed. This module is needed by the ImportXHTML python script.")
def read(self, outputwriter):
""" Start reading and parsing the XML-file. """
import xml.sax.saxlib
import xml.sax.saxexts
class SaxHandler(xml.sax.saxlib.HandlerBase):
""" The SaxHandler is our event-handler SAX calls on
parsing the XML-file. """
tablebase = ["html","body","table"]
""" The table-base defines where we will find our table-tag
that holds all the data we are interessted at. The default
is to look at <html><body><table></table></body></html>. """
def __init__(self, inputreader, outputwriter):
""" Constructor. """
# The to a SaxInput instance pointing inputreader.
self.inputreader = inputreader
# The to a KexiDBOutput instance pointing outputwriter.
self.outputwriter = outputwriter
# The hierachy-level in the DOM-tree we are in.
self.level = 0
# Defines if we are in the with tablebase defined DOM-element.
self.intable = False
# Points to a KexiDBOutput.Record instance if we are in a DOM-element that defines a record.
self.record = None
# Points to a KexiDBOutput.Field instance if we are in a record's field.
self.field = None
def startDocument(self):
sys.stdout.write('=> Starting parsing\n')
def endDocument(self):
sys.stdout.write('=> Fineshed parsing\n')
def startElement(self, name, attrs):
""" This method is called by SAX if a DOM-element starts. """
if self.level < len(self.tablebase):
if self.tablebase[self.level] != name:
self.intable = False
self.intable = True
self.level += 1
if not self.intable:
# Print some debugging-output to stdout.
for idx in range(self.level): sys.stdout.write(' ')
sys.stdout.write('Element: %s' % name)
for attrName in list(attrs.keys()):
sys.stdout.write(' %s="%s"' % (attrName,attrs.get(attrName)))
# handle tr-, th- and td-tags inside the table.
if name == "tr" and (self.level == len(self.tablebase) + 1):
self.record = self.outputwriter.Record()
elif name == "td" and (self.level == len(self.tablebase) + 2):
self.field = self.outputwriter.Field()
elif name == "th" and (self.level == len(self.tablebase) + 2):
self.field = self.outputwriter.Field()
def endElement(self, name):
""" This method is called by SAX if a DOM-Element ends. """
self.level -= 1
#sys.stdout.write('EndElement:%s level:%s len(self.tablebase):%s\n' % (name,self.level,len(self.tablebase)))
if self.record != None:
# a record is defined. so, we are looking for the matching
# end-tags to close a record or a field.
if name == "tr" and (self.level == len(self.tablebase)):
self.record = None
self.field = None
elif name == "td" and (self.level == len(self.tablebase) + 1):
#if self.field == None:
# raise Exception("Unexpected closing </td>")
self.record.setField( self.field )
self.field = None
elif name == "th" and (self.level == len(self.tablebase) + 1):
#if self.field == None:
# raise Exceptin("Unexpected closing </td>")
self.record.setHeader( self.field )
self.field = None
def characters(self, chars, offset, length):
""" This method is called by SAX if the text-content of a DOM-Element
was parsed. """
if self.field != None:
# the xml-data is unicode and we need to encode it
# to latin-1 cause KexiDB deals only with latin-1.
u = str(chars[offset:offset+length])
# start the job
# create saxhandler to handle parsing events.
handler = SaxHandler(self, outputwriter)
# we need a sax-parser and connect it with the handler.
parser = xml.sax.saxexts.make_parser()
# open the XML-file, parse the content and close the file again.
f = file(self.xmlfile, 'r')
# job is done
class KexiDBOutput:
""" The destination target we like to import the data to. This class
provides abstract access to the KexiDB module. """
class Result:
""" Holds some informations about the import-result. """
def __init__(self, outputwriter):
self.outputwriter = outputwriter
# number of records successfully imported.
self.successcount = 0
# number of records where import failed.
self.failedcount = 0
def addLog(self, record, state):
import datetime
date ="%Y-%m-%d %H:%M.%S")
self.outputwriter.logfile.write("%s (%s) %s\n" % (date,state,str(record)))
def success(self, record):
""" Called if a record was written successfully. """
print("SUCCESS: %s" % str(record))
self.successcount += 1
if hasattr(self.outputwriter,"logfile"):
self.addLog(record, "Success")
def failed(self, record):
""" Called if we failed to write a record. """
print("FAILED: %s" % str(record))
self.failedcount += 1
if hasattr(self.outputwriter,"logfile"):
self.addLog(record, "Failed")
class Record:
""" A Record in the dataset. """
def __init__(self):
self.fields = []
def setHeader(self, headerfield):
self.fields.append( headerfield )
self.isHeader = True
def setField(self, field):
self.fields.append( field )
def __str__(self):
s = "["
for f in self.fields:
s += "%s, " % str(f)
return s + "]"
class Field:
""" A field in a record. """
def __init__(self):
self.content = []
def append(self, content):
self.content.append( content )
def __str__(self):
return "".join(self.content)
def __init__(self):
""" Constructor. """
import kexiapp
keximainwindow = kexiapp.get("KexiAppMainWindow")
self.connection = keximainwindow.getConnection()
raise Exception("No connection established. Please open a project before.")
self.fieldlist = None
self.headerrecord = None
self.mapping = {}
def begin(self):
""" Called before parsing starts. """
print("START JOB")
if self.fieldlist == None:
raise Exceptin("Invalid tableschema or fieldlist!")
global KexiDBOutput
self.result = KexiDBOutput.Result(self)
if hasattr(self,"logfilename") and self.logfilename != None and self.logfilename != "":
self.logfile = open(self.logfilename,'w')
def end(self):
""" Called if parsing is fineshed. """
print("END JOB")
self.logfile = None
self.mapping = {}
#self.headerrecord = None
def getTables(self):
""" return a list of avaiable tablenames. """
tables = self.connection.tableNames()
return tables
def setTable(self, tablename):
""" Set the tablename we like to import the data to. """
tableschema = self.connection.tableSchema(tablename)
if tableschema == None:
raise Exceptin("There exists no table with the name '%s'!" % tablename)
self.fieldlist = tableschema.fieldlist()
fields = self.fieldlist.fields()
for field in fields:
print("KexiDBOutput.setTable(%s): %s(%s)" % (tablename,,field.type()))
print("names=%s" % self.fieldlist.names())
def setMapping(self, mapping):
""" Set the tablefieldname=xmlcolnr dictonary we should map the data to. """
self.mapping = mapping
def setLogFile(self, logfilename):
""" Set the name of the logfile. """
self.logfilename = logfilename
def write(self, record):
""" Write the record to the KexiDB table. """
if hasattr(record, "isHeader"):
self.headerrecord = record
for f in record.fields:
sys.stdout.write(' "%s"' % f)
if hasattr(self,"onWrite"):
if not self.onWrite(record):
raise RuntimeError()
self.fieldlist = self.fieldlist.subList( list( self.mapping ) )
# Translate a KexiDBOutput.Record into a list of values.
values = []
for k in self.fieldlist.names():
values.append( str(record.fields[ int(self.mapping[k]) ]) )
print("Import values: %s" % values)
if self.connection.insertRecord(self.fieldlist, values):
err = self.connection.lastError()
raise Exception( "Failed to insert the record:\n%s\n\n%s" % (values,err) )
#raise Exception( "Failed to insert into table \"%s\" the record:\n%s\n%s" % (,values,self.connection.lastError()) )
class GuiApp:
""" The GUI-dialog displayed to let the user define the source
XML-file and the destination KexiDB table. """
class InitialDialog:
def __init__(self, guiapp):
self.guiapp = guiapp
self.ok = False
import gui
self.dialog = gui.Dialog("Import XHTML")
self.dialog.addLabel(self.dialog, "Import data from a XHTML-file to a KexiDB table.\n"
"The destination table needs to be an existing table the data should be added to.")
self.importfile = self.dialog.addFileChooser(self.dialog,
"Source File:",
gui.getHome() + "/kexidata.xhtml",
(('XHTML files', '*.xhtml'),('All files', '*')))
self.desttable = self.dialog.addList(self.dialog, "Destination Table:", self.guiapp.outputwriter.getTables())
#self.operation = self.dialog.addList(self.dialog, "Operation:", ("Insert","Update","Insert/Update"))
#self.error = self.dialog.addList(self.dialog, "On error:", ("Ask","Skip","Abort"))
self.logfile = self.dialog.addFileChooser(self.dialog,
"Log File:",
(('Logfiles', '*.log'),('All files', '*')))
btnframe = self.dialog.addFrame(self.dialog)
self.dialog.addButton(btnframe, "Next", self.doNext)
self.dialog.addButton(btnframe, "Cancel", self.doCancel)
def doCancel(self):
""" Called if the Cancel-button was pressed. """
self.dialog = None
def doNext(self):
""" Start to import the XML-file into the KexiDB table. """
self.guiapp.inputreader.xmlfile = str(self.importfile.get())
self.guiapp.outputwriter.setTable( str(self.desttable.get()) )
self.guiapp.outputwriter.setLogFile( str(self.logfile.get()) )
| self.guiapp.outputwriter )
msgbox = self.dialog.showMessageBox("info","Import done",
"Successfully imported records: %s\nFailed to import records: %s" % (self.guiapp.outputwriter.result.successcount, self.guiapp.outputwriter.result.failedcount) )
except RuntimeError as e:
#except Exception, e:
# import traceback
# traceback.print_exc()
# msgbox = self.dialog.showMessageBox("error", "Error", e)
class MapperDialog:
""" The dialog that provides us a way to map
XHTML columns to the destination table. """
def __init__(self, outputwriter, record):
self.outputwriter = outputwriter
self.ok = False
fieldlist = outputwriter.fieldlist
import gui
self.dlg = gui.Dialog("Import XHTML")
self.dlg.addLabel(self.dlg, "Define how the destination table should be mapped to the data from the XHTML file.")
values = ["",]
for i in range(len(record.fields)):
values.append( "%s: %s" % (i,str(outputwriter.headerrecord.fields[i])) )
values.append( "%s: (%s)" % (i,str(record.fields[i])) )
self.items = []
i = 0
for field in fieldlist.fields():
f = self.dlg.addFrame(self.dlg)
l = self.dlg.addList(f, "%s:" %, values)
self.items.append( (field,l) )
details = "%s:" % str( field.type() )
if field.isAutoInc(): details += "autoinc,"
if field.isUniqueKey(): details += "unique,"
if field.isNotNull(): details += "notnull,"
if field.isNotEmpty(): details += "notempty,"
self.dlg.addLabel(f, "(%s)" % details[:-1])
variable = str( record.fields[i] )
i += 1
if not field.isAutoInc():
except ValueError as e:
if not field.type() in ("Integer","BigInteger","ShortInteger","Float","Double"):
i += 1
btnframe = self.dlg.addFrame(self.dlg)
self.dlg.addButton(btnframe, "Next", self.doNext)
self.dlg.addButton(btnframe, "Cancel", self.dlg.close)
def doNext(self):
mapping = {}
for item in self.items:
(field,l) = item
fieldname =
colnr = str( l.get() ).split(":",1)[0]
if colnr.isdigit():
print("Table field '%s' is mapped to XML column '%s'" % (fieldname,colnr))
mapping[ fieldname ] = colnr
self.ok = True
def __init__(self, inputreader, outputwriter):
""" Constructor. """
self.inputreader = inputreader
self.outputwriter = outputwriter
self.outputwriter.onWrite = self.onWrite
def onWrite(self, record):
""" This method got called after the first record got
readed and before we start to import. """
return self.MapperDialog(self.outputwriter, record).ok
GuiApp( SaxInput(), KexiDBOutput() )