|
|
|
/***************************************************************************
|
|
|
|
pseudoDtd.cpp
|
|
|
|
copyright : (C) 2001-2002 by Daniel Naber
|
|
|
|
email : daniel.naber@t-online.de
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
/***************************************************************************
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License
|
|
|
|
as published by the Free Software Foundation; either version 2
|
|
|
|
of the License, or ( at your option ) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#include "pseudo_dtd.h"
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include <tqdom.h>
|
|
|
|
#include <tqregexp.h>
|
|
|
|
|
|
|
|
#include <klocale.h>
|
|
|
|
#include <kmessagebox.h>
|
|
|
|
|
|
|
|
PseudoDTD::PseudoDTD()
|
|
|
|
{
|
|
|
|
// "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
|
|
|
|
m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
|
|
|
|
}
|
|
|
|
|
|
|
|
PseudoDTD::~PseudoDTD()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void PseudoDTD::analyzeDTD( TQString &metaDtdUrl, TQString &metaDtd )
|
|
|
|
{
|
|
|
|
TQDomDocument doc( "dtdIn_xml" );
|
|
|
|
if ( ! doc.setContent( metaDtd) )
|
|
|
|
{
|
|
|
|
KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
|
|
|
|
"Please check that the file is well-formed XML.").arg( metaDtdUrl ),
|
|
|
|
i18n( "XML Plugin Error") );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( doc.doctype().name() != "dtd" )
|
|
|
|
{
|
|
|
|
KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
|
|
|
|
"Please check that the file is of this type:\n"
|
|
|
|
"-//Norman Walsh//DTD DTDParse V2.0//EN\n"
|
|
|
|
"You can produce such files with dtdparse. "
|
|
|
|
"See the Kate Plugin documentation for more information.").arg( metaDtdUrl ),
|
|
|
|
i18n("XML Plugin Error") );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint listLength = 0;
|
|
|
|
listLength += doc.elementsByTagName( "entity" ).count();
|
|
|
|
listLength += doc.elementsByTagName( "element" ).count();
|
|
|
|
// count this twice, as it will be iterated twice ( TODO: optimize that? ):
|
|
|
|
listLength += doc.elementsByTagName( "attlist" ).count() * 2;
|
|
|
|
|
|
|
|
TQProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength,
|
|
|
|
0, "progress", TRUE );
|
|
|
|
progress.setMinimumDuration( 400 );
|
|
|
|
progress.setProgress(0);
|
|
|
|
|
|
|
|
// Get information from meta DTD and put it in TQt data structures for fast access:
|
|
|
|
if( ! parseEntities( &doc, &progress ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
if( ! parseElements( &doc, &progress ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
if( ! parseAttributes( &doc, &progress ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
if( ! parseAttributeValues( &doc, &progress ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
progress.setProgress( listLength ); // just to make sure the dialog disappears
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// ========================================================================
|
|
|
|
// DOM stuff:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Iterate through the XML to get a mapping which sub-elements are allowed for
|
|
|
|
* all elements.
|
|
|
|
*/
|
|
|
|
bool PseudoDTD::parseElements( TQDomDocument *doc, TQProgressDialog *progress )
|
|
|
|
{
|
|
|
|
|
|
|
|
m_elementsList.clear();
|
|
|
|
// We only display a list, i.e. we pretend that the content model is just
|
|
|
|
// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element,
|
|
|
|
// which would otherwise display some elements twice.
|
|
|
|
TQMap<TQString,bool> subelementList; // the bool is not used
|
|
|
|
|
|
|
|
TQDomNodeList list = doc->elementsByTagName( "element" );
|
|
|
|
uint listLength = list.count(); // speedup (really! )
|
|
|
|
|
|
|
|
for( uint i = 0; i < listLength; i++ )
|
|
|
|
{
|
|
|
|
if( progress->wasCancelled() )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
progress->setProgress( progress->progress()+1 );
|
|
|
|
// FIXME!:
|
|
|
|
//tqApp->processEvents();
|
|
|
|
|
|
|
|
subelementList.clear();
|
|
|
|
TQDomNode node = list.item( i );
|
|
|
|
TQDomElement elem = node.toElement();
|
|
|
|
|
|
|
|
if( !elem.isNull() )
|
|
|
|
{
|
|
|
|
// Enter the expanded content model, which may also include stuff not allowed.
|
|
|
|
// We do not care if it's a <sequence-group> or whatever.
|
|
|
|
TQDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" );
|
|
|
|
TQDomNode contentModelNode = contentModelList.item(0);
|
|
|
|
TQDomElement contentModelElem = contentModelNode.toElement();
|
|
|
|
if( ! contentModelElem.isNull() )
|
|
|
|
{
|
|
|
|
// check for <pcdata/>:
|
|
|
|
TQDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" );
|
|
|
|
|
|
|
|
// check for other sub elements:
|
|
|
|
TQDomNodeList subList = contentModelElem.elementsByTagName( "element-name" );
|
|
|
|
uint subListLength = subList.count();
|
|
|
|
for( uint l = 0; l < subListLength; l++ )
|
|
|
|
{
|
|
|
|
TQDomNode subNode = subList.item(l);
|
|
|
|
TQDomElement subElem = subNode.toElement();
|
|
|
|
if( !subElem.isNull() )
|
|
|
|
subelementList[subElem.attribute( "name" )] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// anders: check if this is an EMPTY element, and put "__EMPTY" in the
|
|
|
|
// sub list, so that we can insert tags in empty form if required.
|
|
|
|
TQDomNodeList emptyList = elem.elementsByTagName( "empty" );
|
|
|
|
if ( emptyList.count() )
|
|
|
|
subelementList["__EMPTY"] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a>
|
|
|
|
// in the HTML 4.01 Strict DTD):
|
|
|
|
TQDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" );
|
|
|
|
if( exclusionsList.length() > 0 )
|
|
|
|
{ // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
|
|
|
|
TQDomNode exclusionsNode = exclusionsList.item(0);
|
|
|
|
TQDomElement exclusionsElem = exclusionsNode.toElement();
|
|
|
|
if( ! exclusionsElem.isNull() )
|
|
|
|
{
|
|
|
|
TQDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" );
|
|
|
|
uint subListLength = subList.count();
|
|
|
|
for( uint l = 0; l < subListLength; l++ )
|
|
|
|
{
|
|
|
|
TQDomNode subNode = subList.item(l);
|
|
|
|
TQDomElement subElem = subNode.toElement();
|
|
|
|
if( !subElem.isNull() )
|
|
|
|
{
|
|
|
|
TQMap<TQString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) );
|
|
|
|
if( it != subelementList.end() )
|
|
|
|
subelementList.remove(it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// turn the map into a list:
|
|
|
|
TQStringList subelementListTmp;
|
|
|
|
TQMap<TQString,bool>::Iterator it;
|
|
|
|
for( it = subelementList.begin(); it != subelementList.end(); ++it )
|
|
|
|
subelementListTmp.append( it.key() );
|
|
|
|
|
|
|
|
m_elementsList.insert( elem.attribute( "name" ), subelementListTmp );
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} // end iteration over all <element> nodes
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check which elements are allowed inside a parent element. This returns
|
|
|
|
* a list of allowed elements, but it doesn't care about order or if only a certain
|
|
|
|
* number of occurences is allowed.
|
|
|
|
*/
|
|
|
|
TQStringList PseudoDTD::allowedElements( TQString parentElement )
|
|
|
|
{
|
|
|
|
if( m_sgmlSupport )
|
|
|
|
{
|
|
|
|
// find the matching element, ignoring case:
|
|
|
|
TQMap<TQString,TQStringList>::Iterator it;
|
|
|
|
for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it )
|
|
|
|
{
|
|
|
|
if( it.key().lower() == parentElement.lower() )
|
|
|
|
return it.data();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( m_elementsList.contains(parentElement) )
|
|
|
|
return m_elementsList[parentElement];
|
|
|
|
|
|
|
|
return TQStringList();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Iterate through the XML to get a mapping which attributes are allowed inside
|
|
|
|
* all elements.
|
|
|
|
*/
|
|
|
|
bool PseudoDTD::parseAttributes( TQDomDocument *doc, TQProgressDialog *progress )
|
|
|
|
{
|
|
|
|
m_attributesList.clear();
|
|
|
|
// TQStringList allowedAttributes;
|
|
|
|
TQDomNodeList list = doc->elementsByTagName( "attlist" );
|
|
|
|
uint listLength = list.count();
|
|
|
|
|
|
|
|
for( uint i = 0; i < listLength; i++ )
|
|
|
|
{
|
|
|
|
if( progress->wasCancelled() )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
progress->setProgress( progress->progress()+1 );
|
|
|
|
// FIXME!!
|
|
|
|
//tqApp->processEvents();
|
|
|
|
|
|
|
|
ElementAttributes attrs;
|
|
|
|
TQDomNode node = list.item(i);
|
|
|
|
TQDomElement elem = node.toElement();
|
|
|
|
if( !elem.isNull() )
|
|
|
|
{
|
|
|
|
TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
|
|
|
|
uint attributeListLength = attributeList.count();
|
|
|
|
for( uint l = 0; l < attributeListLength; l++ )
|
|
|
|
{
|
|
|
|
TQDomNode attributeNode = attributeList.item(l);
|
|
|
|
TQDomElement attributeElem = attributeNode.toElement();
|
|
|
|
|
|
|
|
if( ! attributeElem.isNull() )
|
|
|
|
{
|
|
|
|
if ( attributeElem.attribute("type") == "#REQUIRED" )
|
|
|
|
attrs.requiredAttributes.append( attributeElem.attribute("name") );
|
|
|
|
else
|
|
|
|
attrs.optionalAttributes.append( attributeElem.attribute("name") );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
m_attributesList.insert( elem.attribute("name"), attrs );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Check which attributes are allowed for an element.
|
|
|
|
*/
|
|
|
|
TQStringList PseudoDTD::allowedAttributes( TQString element )
|
|
|
|
{
|
|
|
|
if( m_sgmlSupport )
|
|
|
|
{
|
|
|
|
// find the matching element, ignoring case:
|
|
|
|
TQMap<TQString,ElementAttributes>::Iterator it;
|
|
|
|
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) {
|
|
|
|
if( it.key().lower() == element.lower() ) {
|
|
|
|
return it.data().optionalAttributes + it.data().requiredAttributes;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( m_attributesList.contains(element) )
|
|
|
|
return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;
|
|
|
|
|
|
|
|
return TQStringList();
|
|
|
|
}
|
|
|
|
|
|
|
|
TQStringList PseudoDTD::requiredAttributes( const TQString &element ) const
|
|
|
|
{
|
|
|
|
if ( m_sgmlSupport )
|
|
|
|
{
|
|
|
|
TQMap<TQString,ElementAttributes>::ConstIterator it;
|
|
|
|
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it )
|
|
|
|
{
|
|
|
|
if( it.key().lower() == element.lower() )
|
|
|
|
return it.data().requiredAttributes;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( m_attributesList.contains(element) )
|
|
|
|
return m_attributesList[element].requiredAttributes;
|
|
|
|
|
|
|
|
return TQStringList();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Iterate through the XML to get a mapping which attribute values are allowed
|
|
|
|
* for all attributes inside all elements.
|
|
|
|
*/
|
|
|
|
bool PseudoDTD::parseAttributeValues( TQDomDocument *doc, TQProgressDialog *progress )
|
|
|
|
{
|
|
|
|
m_attributevaluesList.clear(); // 1 element : n possible attributes
|
|
|
|
TQMap<TQString,TQStringList> attributevaluesTmp; // 1 attribute : n possible values
|
|
|
|
TQDomNodeList list = doc->elementsByTagName( "attlist" );
|
|
|
|
uint listLength = list.count();
|
|
|
|
|
|
|
|
for( uint i = 0; i < listLength; i++ )
|
|
|
|
{
|
|
|
|
if( progress->wasCancelled() )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
progress->setProgress( progress->progress()+1 );
|
|
|
|
// FIXME!
|
|
|
|
//tqApp->processEvents();
|
|
|
|
|
|
|
|
attributevaluesTmp.clear();
|
|
|
|
TQDomNode node = list.item(i);
|
|
|
|
TQDomElement elem = node.toElement();
|
|
|
|
if( !elem.isNull() )
|
|
|
|
{
|
|
|
|
// Enter the list of <attribute>:
|
|
|
|
TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
|
|
|
|
uint attributeListLength = attributeList.count();
|
|
|
|
for( uint l = 0; l < attributeListLength; l++ )
|
|
|
|
{
|
|
|
|
TQDomNode attributeNode = attributeList.item(l);
|
|
|
|
TQDomElement attributeElem = attributeNode.toElement();
|
|
|
|
if( ! attributeElem.isNull() )
|
|
|
|
{
|
|
|
|
TQString value = attributeElem.attribute( "value" );
|
|
|
|
attributevaluesTmp.insert( attributeElem.attribute("name"), TQStringList::split(TQRegExp(" "), value) );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check which attributes values are allowed for an attribute in an element
|
|
|
|
* (the element is necessary because e.g. "href" inside <a> could be different
|
|
|
|
* to an "href" inside <link>):
|
|
|
|
*/
|
|
|
|
TQStringList PseudoDTD::attributeValues( TQString element, TQString attribute )
|
|
|
|
{
|
|
|
|
// Direct access would be faster than iteration of course but not always correct,
|
|
|
|
// because we need to be case-insensitive.
|
|
|
|
if( m_sgmlSupport ) {
|
|
|
|
// first find the matching element, ignoring case:
|
|
|
|
TQMap< TQString,TQMap<TQString,TQStringList> >::Iterator it;
|
|
|
|
for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it )
|
|
|
|
{
|
|
|
|
if( it.key().lower() == element.lower() )
|
|
|
|
{
|
|
|
|
TQMap<TQString,TQStringList> attrVals = it.data();
|
|
|
|
TQMap<TQString,TQStringList>::Iterator itV;
|
|
|
|
// then find the matching attribute for that element, ignoring case:
|
|
|
|
for( itV = attrVals.begin(); itV != attrVals.end(); ++itV )
|
|
|
|
{
|
|
|
|
if( itV.key().lower() == attribute.lower() )
|
|
|
|
return( itV.data() );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if( m_attributevaluesList.contains(element) )
|
|
|
|
{
|
|
|
|
TQMap<TQString,TQStringList> attrVals = m_attributevaluesList[element];
|
|
|
|
if( attrVals.contains(attribute) )
|
|
|
|
return attrVals[attribute];
|
|
|
|
}
|
|
|
|
|
|
|
|
// no predefined values available:
|
|
|
|
return TQStringList();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Iterate through the XML to get a mapping of all entity names and their expanded
|
|
|
|
* version, e.g. nbsp =>  . Parameter entities are ignored.
|
|
|
|
*/
|
|
|
|
bool PseudoDTD::parseEntities( TQDomDocument *doc, TQProgressDialog *progress )
|
|
|
|
{
|
|
|
|
m_entityList.clear();
|
|
|
|
TQDomNodeList list = doc->elementsByTagName( "entity" );
|
|
|
|
uint listLength = list.count();
|
|
|
|
|
|
|
|
for( uint i = 0; i < listLength; i++ )
|
|
|
|
{
|
|
|
|
if( progress->wasCancelled() )
|
|
|
|
return false;
|
|
|
|
|
|
|
|
progress->setProgress( progress->progress()+1 );
|
|
|
|
//FIXME!!
|
|
|
|
//tqApp->processEvents();
|
|
|
|
TQDomNode node = list.item(i);
|
|
|
|
TQDomElement elem = node.toElement();
|
|
|
|
if( !elem.isNull()
|
|
|
|
&& elem.attribute( "type" ) != "param" )
|
|
|
|
{ // TODO: what's cdata <-> gen ?
|
|
|
|
TQDomNodeList expandedList = elem.elementsByTagName( "text-expanded" );
|
|
|
|
TQDomNode expandedNode = expandedList.item(0);
|
|
|
|
TQDomElement expandedElem = expandedNode.toElement();
|
|
|
|
if( ! expandedElem.isNull() )
|
|
|
|
{
|
|
|
|
TQString exp = expandedElem.text();
|
|
|
|
// TODO: support more than one &#...; in the expanded text
|
|
|
|
/* TODO include do this when the unicode font problem is solved:
|
|
|
|
if( exp.contains(TQRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
|
|
|
|
// hexadecimal numbers, e.g. "ȶ"
|
|
|
|
uint end = exp.find( ";" );
|
|
|
|
exp = exp.mid( 3, end-3 );
|
|
|
|
exp = TQChar();
|
|
|
|
} else if( exp.contains(TQRegExp("^&#[0-9]+;$")) ) {
|
|
|
|
// decimal numbers, e.g. "ì"
|
|
|
|
uint end = exp.find( ";" );
|
|
|
|
exp = exp.mid( 2, end-2 );
|
|
|
|
exp = TQChar( exp.toInt() );
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
m_entityList.insert( elem.attribute("name"), exp );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
m_entityList.insert( elem.attribute("name"), TQString() );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get a list of all ( non-parameter ) entities that start with a certain string.
|
|
|
|
*/
|
|
|
|
TQStringList PseudoDTD::entities( TQString start )
|
|
|
|
{
|
|
|
|
TQStringList entities;
|
|
|
|
TQMap<TQString,TQString>::Iterator it;
|
|
|
|
for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) {
|
|
|
|
if( (*it).startsWith(start) )
|
|
|
|
{
|
|
|
|
TQString str = it.key();
|
|
|
|
/* TODO: show entities as unicode character
|
|
|
|
if( !it.data().isEmpty() ) {
|
|
|
|
//str += " -- " + it.data();
|
|
|
|
TQRegExp re( "&#(\\d+);" );
|
|
|
|
if( re.search(it.data()) != -1 ) {
|
|
|
|
uint ch = re.cap( 1).toUInt();
|
|
|
|
str += " -- " + TQChar( ch).decomposition();
|
|
|
|
}
|
|
|
|
//kdDebug() << "#" << it.data() << endl;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
entities.append( str );
|
|
|
|
// TODO: later use a table view
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return entities;
|
|
|
|
}
|
|
|
|
|
|
|
|
// kate: space-indent on; indent-width 2; replace-tabs on; mixed-indent off;
|