akregator-librss: use namespaces

Check element namespace rather than assuming a prefix, which can result
in incorrect metadata displaying.

Signed-off-by: mio <stigma@disroot.org>
pull/152/head
mio 1 year ago
parent b69050d6e8
commit 81d428dedb

@ -92,13 +92,16 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
d->link = elemText;
}
// prefer content/content:encoded over summary/description for feeds that provide it
if (format == AtomFeed)
{
d->description = extractNode(node, TQString::fromLatin1("content"), false);
}
else
{
d->description = extractElementTextNS(node, ContentNamespace, TQString::fromLatin1("encoded"), false);
}
// prefer content/content:encoded over summary/description for feeds that provide it
TQString tagName=(format==AtomFeed)? TQString::fromLatin1("content"): TQString::fromLatin1("content:encoded");
if (!(elemText = extractNode(node, tagName, false)).isNull())
d->description = elemText;
if (d->description.isEmpty())
{
if (!(elemText = extractNode(node, TQString::fromLatin1("body"), false)).isNull())
@ -130,7 +133,7 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
time = KRFCDate::parseDate(elemText);
}
if (!(elemText = extractNode(node, TQString::fromLatin1("dc:date"))).isNull())
if (!(elemText = extractElementTextNS(node, DublinCoreNamespace, TQString::fromLatin1("date"))).isNull())
{
time = parseISO8601Date(elemText);
}
@ -139,27 +142,22 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
if (time != 0)
d->pubDate.setTime_t(time);
if (!(elemText = extractNode(node, TQString::fromLatin1("wfw:comment"))).isNull()) {
d->commentsLink = elemText;
}
if (!(elemText = extractNode(node, TQString::fromLatin1("slash:comments"))).isNull()) {
d->numComments = elemText.toInt();
}
d->commentsLink = extractElementTextNS(node, CommentAPINamespace, TQString::fromLatin1("comment"));
d->numComments = extractElementTextNS(node, SlashNamespace, TQString::fromLatin1("comments")).toInt();
TQDomElement element = TQDomNode(node).toElement();
// in RSS 1.0, we use <item about> attribute as ID
// FIXME: pass format version instead of checking for attribute
if (!element.isNull() && element.hasAttribute(TQString::fromLatin1("rdf:about")))
if (!element.isNull() && element.hasAttributeNS(RDFNamespace, TQString::fromLatin1("about")))
{
d->guid = element.attribute(TQString::fromLatin1("rdf:about")); // HACK: using ns properly did not work
d->guid = element.attributeNS(RDFNamespace, TQString::fromLatin1("about"), TQString::null);
d->guidIsPermaLink = false;
}
else
{
tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
TQString tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
TQDomNode n = node.namedItem(tagName);
if (!n.isNull())
{

@ -224,7 +224,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->copyright = elemText;
if (d->format == AtomFeed)
elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString());
elemText = rootNode.toElement().attributeNS(XMLNamespace, "lang", TQString::null);
else
elemText = extractNode(channelNode, TQString::fromLatin1("language"));
@ -441,7 +441,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->pubDate.setTime_t(_time);
}
if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) {
if (!(elemText = extractElementTextNS(channelNode, DublinCoreNamespace, "date")).isNull()) {
time_t _time = parseISO8601Date(elemText);
/* \bug This isn't really the right way since it will set the date to
* Jan 1 1970, 1:00:00 if the passed date was invalid; this means that

@ -18,6 +18,21 @@ class TQValueList;
namespace RSS
{
/// The Atom 1.0 XML namespace.
constexpr const char *AtomNamespace = "http://www.w3.org/2005/Atom";
/// The CommentAPI XML namespace.
constexpr const char *CommentAPINamespace = "http://wellformedweb.org/CommentAPI/";
/// The Content XML namespace.
constexpr const char *ContentNamespace = "http://purl.org/rss/1.0/modules/content/";
/// The Dublin Core XML namespace.
constexpr const char *DublinCoreNamespace = "http://purl.org/dc/elements/1.1/";
/// The RDF Concepts Vocabulary (RDF) namespace.
constexpr const char *RDFNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
/// The Slash XML namespace.
constexpr const char *SlashNamespace = "http://purl.org/rss/1.0/modules/slash/";
/// The XML namespace.
constexpr const char *XMLNamespace = "http://www.w3.org/XML/1998/namespace";
/**
* Versions currently supported by this library. This enumeration is
* subject to be extended in the future and used by Document::version() to

@ -351,7 +351,7 @@ void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
TQByteArray tmpData;
tmpData.setRawData(charData, len);
if (doc.setContent(tmpData))
if (doc.setContent(tmpData, /* namespaceProcessing */ true))
{
rssDoc = Document(doc);
if (!rssDoc.isValid())

@ -117,6 +117,59 @@ static TQString extractAtomContent(const TQDomElement& e)
return TQString();
}
TQDomElement extractElementNS(const TQDomNode &parent, const TQString &nameSpace, const TQString &localName)
{
TQDomElement element;
if (parent.isNull())
{
return element;
}
TQDomNodeList children = parent.childNodes();
for (size_t i = 0; i < children.count(); ++i)
{
TQDomNode node = children.item(i);
if (node.isElement() && node.namespaceURI() == nameSpace && node.localName() == localName)
{
element = node.toElement();
break;
}
}
return element;
}
TQString extractElementTextNS(const TQDomNode &parent, const TQString &namespaceURI, const TQString &localName, bool isInlined)
{
TQDomElement element = extractElementNS(parent, namespaceURI, localName);
if (element.isNull())
{
return TQString::null;
}
TQString result = element.text().stripWhiteSpace();
if (localName == "content")
{
// Atom content
result = extractAtomContent(element);
}
else
{
// Check for HTML; not necessary for atom:content
// Taken from extractNode below
bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false);
bool hasHtml = hasPre || result.contains("<");
if (!isInlined && !hasHtml)
result = result = result.replace(TQChar('\n'), "<br />");
if (!hasPre)
result = result.simplifyWhiteSpace();
}
return result.isEmpty() ? TQString::null : result;
}
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined)
{
TQDomNode node = parent.namedItem(elemName);
@ -208,25 +261,25 @@ TQString parseItemAuthor(const TQDomElement& element, Format format, Version ver
TQString name;
TQString email;
TQDomElement dcCreator = element.namedItem("dc:creator").toElement();
TQDomElement dcCreator = extractElementNS(element, DublinCoreNamespace, "creator");
if (!dcCreator.isNull())
authorFromString(dcCreator.text(), name, email);
else if (format == AtomFeed)
{
TQDomElement atomAuthor = element.namedItem("author").toElement();
if (atomAuthor.isNull())
atomAuthor = element.namedItem("atom:author").toElement();
atomAuthor = extractElementNS(element, AtomNamespace, "author");
if (!atomAuthor.isNull())
{
TQDomElement atomName = atomAuthor.namedItem("name").toElement();
if (atomName.isNull())
atomName = atomAuthor.namedItem("atom:name").toElement();
atomName = extractElementNS(atomAuthor, AtomNamespace, "name");
name = atomName.text().stripWhiteSpace();
TQDomElement atomEmail = atomAuthor.namedItem("email").toElement();
if (atomEmail.isNull())
atomEmail = atomAuthor.namedItem("atom:email").toElement();
atomEmail = extractElementNS(atomAuthor, AtomNamespace, "email");
email = atomEmail.text().stripWhiteSpace();
}
}

@ -29,6 +29,8 @@ namespace RSS
unsigned int count;
};
TQDomElement extractElementNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName);
TQString extractElementTextNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName, bool isInlined = true);
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined=true);
TQString extractTitle(const TQDomNode &parent);
TQString childNodesAsXML(const TQDomNode& parent);

Loading…
Cancel
Save