You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
4.5 KiB
129 lines
4.5 KiB
/***************************************************************************
|
|
copyright : (C) 2006 by Robby Stephenson
|
|
email : robby@periapsis.org
|
|
***************************************************************************/
|
|
|
|
/***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
* published by the Free Software Foundation; *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
#include "dcimporter.h"
|
|
#include "../collections/bookcollection.h"
|
|
#include "tellico_xml.h"
|
|
#include "../tellico_debug.h"
|
|
|
|
using Tellico::Import::DCImporter;
|
|
|
|
DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) {
|
|
}
|
|
|
|
DCImporter::DCImporter(const TQString& text_) : XMLImporter(text_) {
|
|
}
|
|
|
|
DCImporter::DCImporter(const TQDomDocument& dom_) : XMLImporter(dom_) {
|
|
}
|
|
|
|
Tellico::Data::CollPtr DCImporter::collection() {
|
|
const TQString& dc = XML::nsDublinCore;
|
|
const TQString& zing = XML::nsZing;
|
|
|
|
Data::CollPtr c = new Data::BookCollection(true);
|
|
|
|
TQDomDocument doc = domDocument();
|
|
|
|
TQRegExp authorDateRX(TQString::fromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$"));
|
|
TQRegExp dateRX(TQString::fromLatin1("\\d{4}"));
|
|
|
|
TQDomNodeList recordList = doc.elementsByTagNameNS(zing, TQString::fromLatin1("recordData"));
|
|
myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl;
|
|
|
|
enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS;
|
|
|
|
#define GETELEMENTS(s) (useNS == NoNS) \
|
|
? elem.elementsByTagName(TQString::fromLatin1(s)) \
|
|
: elem.elementsByTagNameNS(dc, TQString::fromLatin1(s))
|
|
|
|
for(uint i = 0; i < recordList.count(); ++i) {
|
|
Data::EntryPtr e = new Data::Entry(c);
|
|
|
|
TQDomElement elem = recordList.item(i).toElement();
|
|
|
|
TQDomNodeList nodeList = GETELEMENTS("title");
|
|
if(nodeList.count() == 0) { // no title, skip
|
|
if(useNS == UnknownNS) {
|
|
nodeList = elem.elementsByTagName(TQString::fromLatin1("title"));
|
|
if(nodeList.count() > 0) {
|
|
useNS = NoNS;
|
|
} else {
|
|
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
|
|
continue;
|
|
}
|
|
} else {
|
|
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
|
|
continue;
|
|
}
|
|
} else if(useNS == UnknownNS) {
|
|
useNS = UseNS;
|
|
}
|
|
TQString s = nodeList.item(0).toElement().text();
|
|
s.replace('\n', ' ');
|
|
s = s.simplifyWhiteSpace();
|
|
e->setField(TQString::fromLatin1("title"), s);
|
|
|
|
nodeList = GETELEMENTS("creator");
|
|
TQStringList creators;
|
|
for(uint j = 0; j < nodeList.count(); ++j) {
|
|
TQString s = nodeList.item(j).toElement().text();
|
|
if(authorDateRX.search(s) > -1) {
|
|
// check if anything after date like [publisher]
|
|
if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) {
|
|
s.remove(authorDateRX);
|
|
s = s.simplifyWhiteSpace();
|
|
creators << s;
|
|
} else {
|
|
myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl;
|
|
}
|
|
} else {
|
|
creators << s;
|
|
}
|
|
}
|
|
e->setField(TQString::fromLatin1("author"), creators.join(TQString::fromLatin1("; ")));
|
|
|
|
nodeList = GETELEMENTS("publisher");
|
|
TQStringList publishers;
|
|
for(uint j = 0; j < nodeList.count(); ++j) {
|
|
publishers << nodeList.item(j).toElement().text();
|
|
}
|
|
e->setField(TQString::fromLatin1("publisher"), publishers.join(TQString::fromLatin1("; ")));
|
|
|
|
nodeList = GETELEMENTS("subject");
|
|
TQStringList keywords;
|
|
for(uint j = 0; j < nodeList.count(); ++j) {
|
|
keywords << nodeList.item(j).toElement().text();
|
|
}
|
|
e->setField(TQString::fromLatin1("keyword"), keywords.join(TQString::fromLatin1("; ")));
|
|
|
|
nodeList = GETELEMENTS("date");
|
|
if(nodeList.count() > 0) {
|
|
TQString s = nodeList.item(0).toElement().text();
|
|
if(dateRX.search(s) > -1) {
|
|
e->setField(TQString::fromLatin1("pub_year"), dateRX.cap());
|
|
}
|
|
}
|
|
|
|
nodeList = GETELEMENTS("description");
|
|
if(nodeList.count() > 0) { // no title, skip
|
|
e->setField(TQString::fromLatin1("comments"), nodeList.item(0).toElement().text());
|
|
}
|
|
|
|
c->addEntries(e);
|
|
}
|
|
#undef GETELEMENTS
|
|
|
|
return c;
|
|
}
|