You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdeaddons/kate/xmltools/pseudo_dtd.cpp

467 lines
15 KiB

/***************************************************************************
pseudoDtd.cpp
copyright : (C) 2001-2002 by Daniel Naber
email : daniel.naber@t-online.de
***************************************************************************/
/***************************************************************************
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or ( at your option ) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include "pseudo_dtd.h"
#include <assert.h>
#include <tqdom.h>
#include <tqregexp.h>
#include <klocale.h>
#include <kmessagebox.h>
PseudoDTD::PseudoDTD()
{
// "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
}
PseudoDTD::~PseudoDTD()
{
}
void PseudoDTD::analyzeDTD( TQString &metaDtdUrl, TQString &metaDtd )
{
TQDomDocument doc( "dtdIn_xml" );
if ( ! doc.setContent( metaDtd) )
{
KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
"Please check that the file is well-formed XML.").arg( metaDtdUrl ),
i18n( "XML Plugin Error") );
return;
}
if ( doc.doctype().name() != "dtd" )
{
KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
"Please check that the file is of this type:\n"
"-//Norman Walsh//DTD DTDParse V2.0//EN\n"
"You can produce such files with dtdparse. "
"See the Kate Plugin documentation for more information.").arg( metaDtdUrl ),
i18n("XML Plugin Error") );
return;
}
uint listLength = 0;
listLength += doc.elementsByTagName( "entity" ).count();
listLength += doc.elementsByTagName( "element" ).count();
// count this twice, as it will be iterated twice ( TODO: optimize that? ):
listLength += doc.elementsByTagName( "attlist" ).count() * 2;
TQProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength,
0, "progress", TRUE );
progress.setMinimumDuration( 400 );
progress.setProgress(0);
// Get information from meta DTD and put it in TQt data structures for fast access:
if( ! parseEntities( &doc, &progress ) )
return;
if( ! parseElements( &doc, &progress ) )
return;
if( ! parseAttributes( &doc, &progress ) )
return;
if( ! parseAttributeValues( &doc, &progress ) )
return;
progress.setProgress( listLength ); // just to make sure the dialog disappears
}
// ========================================================================
// DOM stuff:
/**
* Iterate through the XML to get a mapping which sub-elements are allowed for
* all elements.
*/
bool PseudoDTD::parseElements( TQDomDocument *doc, TQProgressDialog *progress )
{
m_elementsList.clear();
// We only display a list, i.e. we pretend that the content model is just
// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element,
// which would otherwise display some elements twice.
TQMap<TQString,bool> subelementList; // the bool is not used
TQDomNodeList list = doc->elementsByTagName( "element" );
uint listLength = list.count(); // speedup (really! )
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!:
//tqApp->processEvents();
subelementList.clear();
TQDomNode node = list.item( i );
TQDomElement elem = node.toElement();
if( !elem.isNull() )
{
// Enter the expanded content model, which may also include stuff not allowed.
// We do not care if it's a <sequence-group> or whatever.
TQDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" );
TQDomNode contentModelNode = contentModelList.item(0);
TQDomElement contentModelElem = contentModelNode.toElement();
if( ! contentModelElem.isNull() )
{
// check for <pcdata/>:
TQDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" );
// check for other sub elements:
TQDomNodeList subList = contentModelElem.elementsByTagName( "element-name" );
uint subListLength = subList.count();
for( uint l = 0; l < subListLength; l++ )
{
TQDomNode subNode = subList.item(l);
TQDomElement subElem = subNode.toElement();
if( !subElem.isNull() )
subelementList[subElem.attribute( "name" )] = true;
}
// anders: check if this is an EMPTY element, and put "__EMPTY" in the
// sub list, so that we can insert tags in empty form if required.
TQDomNodeList emptyList = elem.elementsByTagName( "empty" );
if ( emptyList.count() )
subelementList["__EMPTY"] = true;
}
// Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a>
// in the HTML 4.01 Strict DTD):
TQDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" );
if( exclusionsList.length() > 0 )
{ // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
TQDomNode exclusionsNode = exclusionsList.item(0);
TQDomElement exclusionsElem = exclusionsNode.toElement();
if( ! exclusionsElem.isNull() )
{
TQDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" );
uint subListLength = subList.count();
for( uint l = 0; l < subListLength; l++ )
{
TQDomNode subNode = subList.item(l);
TQDomElement subElem = subNode.toElement();
if( !subElem.isNull() )
{
TQMap<TQString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) );
if( it != subelementList.end() )
subelementList.remove(it);
}
}
}
}
// turn the map into a list:
TQStringList subelementListTmp;
TQMap<TQString,bool>::Iterator it;
for( it = subelementList.begin(); it != subelementList.end(); ++it )
subelementListTmp.append( it.key() );
m_elementsList.insert( elem.attribute( "name" ), subelementListTmp );
}
} // end iteration over all <element> nodes
return true;
}
/**
* Check which elements are allowed inside a parent element. This returns
* a list of allowed elements, but it doesn't care about order or if only a certain
* number of occurences is allowed.
*/
TQStringList PseudoDTD::allowedElements( TQString parentElement )
{
if( m_sgmlSupport )
{
// find the matching element, ignoring case:
TQMap<TQString,TQStringList>::Iterator it;
for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it )
{
if( it.key().lower() == parentElement.lower() )
return it.data();
}
}
else if( m_elementsList.contains(parentElement) )
return m_elementsList[parentElement];
return TQStringList();
}
/**
* Iterate through the XML to get a mapping which attributes are allowed inside
* all elements.
*/
bool PseudoDTD::parseAttributes( TQDomDocument *doc, TQProgressDialog *progress )
{
m_attributesList.clear();
// TQStringList allowedAttributes;
TQDomNodeList list = doc->elementsByTagName( "attlist" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!!
//tqApp->processEvents();
ElementAttributes attrs;
TQDomNode node = list.item(i);
TQDomElement elem = node.toElement();
if( !elem.isNull() )
{
TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
uint attributeListLength = attributeList.count();
for( uint l = 0; l < attributeListLength; l++ )
{
TQDomNode attributeNode = attributeList.item(l);
TQDomElement attributeElem = attributeNode.toElement();
if( ! attributeElem.isNull() )
{
if ( attributeElem.attribute("type") == "#RETQUIRED" )
attrs.requiredAttributes.append( attributeElem.attribute("name") );
else
attrs.optionalAttributes.append( attributeElem.attribute("name") );
}
}
m_attributesList.insert( elem.attribute("name"), attrs );
}
}
return true;
}
/** Check which attributes are allowed for an element.
*/
TQStringList PseudoDTD::allowedAttributes( TQString element )
{
if( m_sgmlSupport )
{
// find the matching element, ignoring case:
TQMap<TQString,ElementAttributes>::Iterator it;
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) {
if( it.key().lower() == element.lower() ) {
return it.data().optionalAttributes + it.data().requiredAttributes;
}
}
}
else if( m_attributesList.contains(element) )
return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;
return TQStringList();
}
TQStringList PseudoDTD::requiredAttributes( const TQString &element ) const
{
if ( m_sgmlSupport )
{
TQMap<TQString,ElementAttributes>::ConstIterator it;
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it )
{
if( it.key().lower() == element.lower() )
return it.data().requiredAttributes;
}
}
else if( m_attributesList.contains(element) )
return m_attributesList[element].requiredAttributes;
return TQStringList();
}
/**
* Iterate through the XML to get a mapping which attribute values are allowed
* for all attributes inside all elements.
*/
bool PseudoDTD::parseAttributeValues( TQDomDocument *doc, TQProgressDialog *progress )
{
m_attributevaluesList.clear(); // 1 element : n possible attributes
TQMap<TQString,TQStringList> attributevaluesTmp; // 1 attribute : n possible values
TQDomNodeList list = doc->elementsByTagName( "attlist" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!
//tqApp->processEvents();
attributevaluesTmp.clear();
TQDomNode node = list.item(i);
TQDomElement elem = node.toElement();
if( !elem.isNull() )
{
// Enter the list of <attribute>:
TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
uint attributeListLength = attributeList.count();
for( uint l = 0; l < attributeListLength; l++ )
{
TQDomNode attributeNode = attributeList.item(l);
TQDomElement attributeElem = attributeNode.toElement();
if( ! attributeElem.isNull() )
{
TQString value = attributeElem.attribute( "value" );
attributevaluesTmp.insert( attributeElem.attribute("name"), TQStringList::split(TQRegExp(" "), value) );
}
}
m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp );
}
}
return true;
}
/**
* Check which attributes values are allowed for an attribute in an element
* (the element is necessary because e.g. "href" inside <a> could be different
* to an "href" inside <link>):
*/
TQStringList PseudoDTD::attributeValues( TQString element, TQString attribute )
{
// Direct access would be faster than iteration of course but not always correct,
// because we need to be case-insensitive.
if( m_sgmlSupport ) {
// first find the matching element, ignoring case:
TQMap< TQString,TQMap<TQString,TQStringList> >::Iterator it;
for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it )
{
if( it.key().lower() == element.lower() )
{
TQMap<TQString,TQStringList> attrVals = it.data();
TQMap<TQString,TQStringList>::Iterator itV;
// then find the matching attribute for that element, ignoring case:
for( itV = attrVals.begin(); itV != attrVals.end(); ++itV )
{
if( itV.key().lower() == attribute.lower() )
return( itV.data() );
}
}
}
}
else if( m_attributevaluesList.contains(element) )
{
TQMap<TQString,TQStringList> attrVals = m_attributevaluesList[element];
if( attrVals.contains(attribute) )
return attrVals[attribute];
}
// no predefined values available:
return TQStringList();
}
/**
* Iterate through the XML to get a mapping of all entity names and their expanded
* version, e.g. nbsp => &#160;. Parameter entities are ignored.
*/
bool PseudoDTD::parseEntities( TQDomDocument *doc, TQProgressDialog *progress )
{
m_entityList.clear();
TQDomNodeList list = doc->elementsByTagName( "entity" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
//FIXME!!
//tqApp->processEvents();
TQDomNode node = list.item(i);
TQDomElement elem = node.toElement();
if( !elem.isNull()
&& elem.attribute( "type" ) != "param" )
{ // TODO: what's cdata <-> gen ?
TQDomNodeList expandedList = elem.elementsByTagName( "text-expanded" );
TQDomNode expandedNode = expandedList.item(0);
TQDomElement expandedElem = expandedNode.toElement();
if( ! expandedElem.isNull() )
{
TQString exp = expandedElem.text();
// TODO: support more than one &#...; in the expanded text
/* TODO include do this when the unicode font problem is solved:
if( exp.contains(TQRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
// hexadecimal numbers, e.g. "&#x236;"
uint end = exp.find( ";" );
exp = exp.mid( 3, end-3 );
exp = TQChar();
} else if( exp.contains(TQRegExp("^&#[0-9]+;$")) ) {
// decimal numbers, e.g. "&#236;"
uint end = exp.find( ";" );
exp = exp.mid( 2, end-2 );
exp = TQChar( exp.toInt() );
}
*/
m_entityList.insert( elem.attribute("name"), exp );
}
else
{
m_entityList.insert( elem.attribute("name"), TQString() );
}
}
}
return true;
}
/**
* Get a list of all ( non-parameter ) entities that start with a certain string.
*/
TQStringList PseudoDTD::entities( TQString start )
{
TQStringList entities;
TQMap<TQString,TQString>::Iterator it;
for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) {
if( (*it).startsWith(start) )
{
TQString str = it.key();
/* TODO: show entities as unicode character
if( !it.data().isEmpty() ) {
//str += " -- " + it.data();
TQRegExp re( "&#(\\d+);" );
if( re.search(it.data()) != -1 ) {
uint ch = re.cap( 1).toUInt();
str += " -- " + TQChar( ch).decomposition();
}
//kdDebug() << "#" << it.data() << endl;
}
*/
entities.append( str );
// TODO: later use a table view
}
}
return entities;
}
// kate: space-indent on; indent-width 2; replace-tabs on; mixed-indent off;