|
|
|
/***************************************************************************
|
|
|
|
copyright : (C) 2004-2006 by Robby Stephenson
|
|
|
|
email : robby@periapsis.org
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
/***************************************************************************
|
|
|
|
* *
|
|
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
|
|
* published by the Free Software Foundation; *
|
|
|
|
* *
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#include "risimporter.h"
|
|
|
|
#include "../collections/bibtexcollection.h"
|
|
|
|
#include "../document.h"
|
|
|
|
#include "../entry.h"
|
|
|
|
#include "../field.h"
|
|
|
|
#include "../latin1literal.h"
|
|
|
|
#include "../progressmanager.h"
|
|
|
|
#include "../filehandler.h"
|
|
|
|
#include "../isbnvalidator.h"
|
|
|
|
#include "../tellico_debug.h"
|
|
|
|
|
|
|
|
#include <kapplication.h>
|
|
|
|
|
|
|
|
#include <tqdict.h>
|
|
|
|
#include <tqregexp.h>
|
|
|
|
#include <tqmap.h>
|
|
|
|
|
|
|
|
using Tellico::Import::RISImporter;
|
|
|
|
TQMap<TQString, TQString>* RISImporter::s_tagMap = 0;
|
|
|
|
TQMap<TQString, TQString>* RISImporter::s_typeMap = 0;
|
|
|
|
|
|
|
|
// static
|
|
|
|
void RISImporter::initTagMap() {
|
|
|
|
if(!s_tagMap) {
|
|
|
|
s_tagMap = new TQMap<TQString, TQString>();
|
|
|
|
// BT is special and is handled separately
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("TY"), TQString::tqfromLatin1("entry-type"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("ID"), TQString::tqfromLatin1("bibtex-key"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("T1"), TQString::tqfromLatin1("title"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("TI"), TQString::tqfromLatin1("title"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("T2"), TQString::tqfromLatin1("booktitle"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("A1"), TQString::tqfromLatin1("author"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("AU"), TQString::tqfromLatin1("author"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("ED"), TQString::tqfromLatin1("editor"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("YR"), TQString::tqfromLatin1("year"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("PY"), TQString::tqfromLatin1("year"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("N1"), TQString::tqfromLatin1("note"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("AB"), TQString::tqfromLatin1("abstract")); // should be note?
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("N2"), TQString::tqfromLatin1("abstract"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("KW"), TQString::tqfromLatin1("keyword"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("JF"), TQString::tqfromLatin1("journal"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("JO"), TQString::tqfromLatin1("journal"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("JA"), TQString::tqfromLatin1("journal"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("VL"), TQString::tqfromLatin1("volume"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("IS"), TQString::tqfromLatin1("number"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("PB"), TQString::tqfromLatin1("publisher"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("SN"), TQString::tqfromLatin1("isbn"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("AD"), TQString::tqfromLatin1("address"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("CY"), TQString::tqfromLatin1("address"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("UR"), TQString::tqfromLatin1("url"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("L1"), TQString::tqfromLatin1("pdf"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("T3"), TQString::tqfromLatin1("series"));
|
|
|
|
s_tagMap->insert(TQString::tqfromLatin1("EP"), TQString::tqfromLatin1("pages"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// static
|
|
|
|
void RISImporter::initTypeMap() {
|
|
|
|
if(!s_typeMap) {
|
|
|
|
s_typeMap = new TQMap<TQString, TQString>();
|
|
|
|
// leave capitalized, except for bibtex types
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("ABST"), TQString::tqfromLatin1("Abstract"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("ADVS"), TQString::tqfromLatin1("Audiovisual material"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("ART"), TQString::tqfromLatin1("Art Work"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("BILL"), TQString::tqfromLatin1("Bill/Resolution"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("BOOK"), TQString::tqfromLatin1("book")); // bibtex
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("CASE"), TQString::tqfromLatin1("Case"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("CHAP"), TQString::tqfromLatin1("inbook")); // == "inbook" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("COMP"), TQString::tqfromLatin1("Computer program"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("CONF"), TQString::tqfromLatin1("inproceedings")); // == "conference" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("CTLG"), TQString::tqfromLatin1("Catalog"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("DATA"), TQString::tqfromLatin1("Data file"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("ELEC"), TQString::tqfromLatin1("Electronic Citation"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("GEN"), TQString::tqfromLatin1("Generic"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("HEAR"), TQString::tqfromLatin1("Hearing"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("ICOMM"), TQString::tqfromLatin1("Internet Communication"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("INPR"), TQString::tqfromLatin1("In Press"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("JFULL"), TQString::tqfromLatin1("Journal (full)")); // = "periodical" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("JOUR"), TQString::tqfromLatin1("article")); // "Journal"
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("MAP"), TQString::tqfromLatin1("Map"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("MGZN"), TQString::tqfromLatin1("article")); // bibtex
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("MPCT"), TQString::tqfromLatin1("Motion picture"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("MUSIC"), TQString::tqfromLatin1("Music score"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("NEWS"), TQString::tqfromLatin1("Newspaper"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("PAMP"), TQString::tqfromLatin1("Pamphlet")); // = "booklet" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("PAT"), TQString::tqfromLatin1("Patent"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("PCOMM"), TQString::tqfromLatin1("Personal communication"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("RPRT"), TQString::tqfromLatin1("Report")); // = "techreport" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("SER"), TQString::tqfromLatin1("Serial (BookMonograph)"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("SLIDE"), TQString::tqfromLatin1("Slide"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("SOUND"), TQString::tqfromLatin1("Sound recording"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("STAT"), TQString::tqfromLatin1("Statute"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("THES"), TQString::tqfromLatin1("phdthesis")); // "mastersthesis" ?
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("UNBILL"), TQString::tqfromLatin1("Unenacted bill/resolution"));
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("UNPB"), TQString::tqfromLatin1("unpublished")); // bibtex
|
|
|
|
s_typeMap->insert(TQString::tqfromLatin1("VIDEO"), TQString::tqfromLatin1("Video recording"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RISImporter::RISImporter(const KURL::List& urls_) : Tellico::Import::Importer(urls_), m_coll(0), m_cancelled(false) {
|
|
|
|
initTagMap();
|
|
|
|
initTypeMap();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool RISImporter::canImport(int type) const {
|
|
|
|
return type == Data::Collection::Bibtex;
|
|
|
|
}
|
|
|
|
|
|
|
|
Tellico::Data::CollPtr RISImporter::collection() {
|
|
|
|
if(m_coll) {
|
|
|
|
return m_coll;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_coll = new Data::BibtexCollection(true);
|
|
|
|
|
|
|
|
TQDict<Data::Field> risFields;
|
|
|
|
|
|
|
|
// need to know if any extended properties in current collection point to RIS
|
|
|
|
// if so, add to collection
|
|
|
|
Data::CollPtr currColl = Data::Document::self()->collection();
|
|
|
|
Data::FieldVec vec = currColl->fields();
|
|
|
|
for(Data::FieldVec::Iterator it = vec.begin(); it != vec.end(); ++it) {
|
|
|
|
// continue if property is empty
|
|
|
|
TQString ris = it->property(TQString::tqfromLatin1("ris"));
|
|
|
|
if(ris.isEmpty()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// if current collection has one with the same name, set the property
|
|
|
|
Data::FieldPtr f = m_coll->fieldByName(it->name());
|
|
|
|
if(!f) {
|
|
|
|
f = new Data::Field(*it);
|
|
|
|
m_coll->addField(f);
|
|
|
|
}
|
|
|
|
f->setProperty(TQString::tqfromLatin1("ris"), ris);
|
|
|
|
risFields.insert(ris, f);
|
|
|
|
}
|
|
|
|
|
|
|
|
ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true);
|
|
|
|
item.setTotalSteps(urls().count() * 100);
|
|
|
|
connect(&item, TQT_SIGNAL(signalCancelled(ProgressItem*)), TQT_SLOT(slotCancel()));
|
|
|
|
ProgressItem::Done done(this);
|
|
|
|
|
|
|
|
int count = 0;
|
|
|
|
KURL::List urls = this->urls();
|
|
|
|
for(KURL::List::ConstIterator it = urls.begin(); it != urls.end() && !m_cancelled; ++it, ++count) {
|
|
|
|
readURL(*it, count, risFields);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(m_cancelled) {
|
|
|
|
m_coll = 0;
|
|
|
|
}
|
|
|
|
return m_coll;
|
|
|
|
}
|
|
|
|
|
|
|
|
void RISImporter::readURL(const KURL& url_, int n, const TQDict<Data::Field>& risFields_) {
|
|
|
|
TQString str = FileHandler::readTextFile(url_);
|
|
|
|
if(str.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ISBNValidator isbnval(this);
|
|
|
|
|
|
|
|
TQTextIStream t(&str);
|
|
|
|
|
|
|
|
const uint length = str.length();
|
|
|
|
const uint stepSize = TQMAX(s_stepSize, length/100);
|
|
|
|
const bool showProgress = options() & ImportProgress;
|
|
|
|
|
|
|
|
bool needToAddFinal = false;
|
|
|
|
|
|
|
|
TQString sp, ep;
|
|
|
|
|
|
|
|
uint j = 0;
|
|
|
|
Data::EntryPtr entry = new Data::Entry(m_coll);
|
|
|
|
// technically, the spec requires a space immediately after the hyphen
|
|
|
|
// however, at least one website (Springer) outputs RIS with no space after the final "ER -"
|
|
|
|
// so just strip the white space later
|
|
|
|
// also be gracious and allow only any amount of space before hyphen
|
|
|
|
TQRegExp rx(TQString::tqfromLatin1("^(\\w\\w)\\s+-(.*)$"));
|
|
|
|
TQString currLine, nextLine;
|
|
|
|
for(currLine = t.readLine(); !m_cancelled && !currLine.isNull(); currLine = nextLine, j += currLine.length()) {
|
|
|
|
nextLine = t.readLine();
|
|
|
|
rx.search(currLine);
|
|
|
|
TQString tag = rx.cap(1);
|
|
|
|
TQString value = rx.cap(2).stripWhiteSpace();
|
|
|
|
if(tag.isEmpty()) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// myDebug() << tag << ": " << value << endl;
|
|
|
|
// if the next line is not empty and does not match start regexp, append to value
|
|
|
|
while(!nextLine.isEmpty() && nextLine.tqfind(rx) == -1) {
|
|
|
|
value += nextLine.stripWhiteSpace();
|
|
|
|
nextLine = t.readLine();
|
|
|
|
}
|
|
|
|
|
|
|
|
// every entry ends with "ER"
|
|
|
|
if(tag == Latin1Literal("ER")) {
|
|
|
|
m_coll->addEntries(entry);
|
|
|
|
entry = new Data::Entry(m_coll);
|
|
|
|
needToAddFinal = false;
|
|
|
|
continue;
|
|
|
|
} else if(tag == Latin1Literal("TY") && s_typeMap->tqcontains(value)) {
|
|
|
|
// for entry-type, switch it to normalized type name
|
|
|
|
value = (*s_typeMap)[value];
|
|
|
|
} else if(tag == Latin1Literal("SN")) {
|
|
|
|
// test for valid isbn, sometimes the issn gets stuck here
|
|
|
|
int pos = 0;
|
|
|
|
if(isbnval.validate(value, pos) != ISBNValidator::Acceptable) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else if(tag == Latin1Literal("SP")) {
|
|
|
|
sp = value;
|
|
|
|
if(!ep.isEmpty()) {
|
|
|
|
value = sp + '-' + ep;
|
|
|
|
tag = TQString::tqfromLatin1("EP");
|
|
|
|
sp = TQString();
|
|
|
|
ep = TQString();
|
|
|
|
} else {
|
|
|
|
// nothing else to do
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else if(tag == Latin1Literal("EP")) {
|
|
|
|
ep = value;
|
|
|
|
if(!sp.isEmpty()) {
|
|
|
|
value = sp + '-' + ep;
|
|
|
|
sp = TQString();
|
|
|
|
ep = TQString();
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else if(tag == Latin1Literal("YR") || tag == Latin1Literal("PY")) { // for now, just grab the year
|
|
|
|
value = value.section('/', 0, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// the lookup scheme is:
|
|
|
|
// 1. any field has an RIS property that matches the tag name
|
|
|
|
// 2. default field mapping tag -> field name
|
|
|
|
Data::FieldPtr f = risFields_.tqfind(tag);
|
|
|
|
if(!f) {
|
|
|
|
// special case for BT
|
|
|
|
// primary title for books, secondary for everything else
|
|
|
|
if(tag == Latin1Literal("BT")) {
|
|
|
|
if(entry->field(TQString::tqfromLatin1("entry-type")) == Latin1Literal("book")) {
|
|
|
|
f = m_coll->fieldByName(TQString::tqfromLatin1("title"));
|
|
|
|
} else {
|
|
|
|
f = m_coll->fieldByName(TQString::tqfromLatin1("booktitle"));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
f = fieldByTag(tag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(!f) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
needToAddFinal = true;
|
|
|
|
|
|
|
|
// harmless for non-choice fields
|
|
|
|
// for entry-type, want it in lower case
|
|
|
|
f->addAllowed(value);
|
|
|
|
// if the field can have multiple values, append current values to new value
|
|
|
|
if((f->flags() & Data::Field::AllowMultiple) && !entry->field(f->name()).isEmpty()) {
|
|
|
|
value.prepend(entry->field(f->name()) + TQString::tqfromLatin1("; "));
|
|
|
|
}
|
|
|
|
entry->setField(f, value);
|
|
|
|
|
|
|
|
if(showProgress && j%stepSize == 0) {
|
|
|
|
ProgressManager::self()->setProgress(this, n*100 + 100*j/length);
|
|
|
|
kapp->processEvents();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(needToAddFinal) {
|
|
|
|
m_coll->addEntries(entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Tellico::Data::FieldPtr RISImporter::fieldByTag(const TQString& tag_) {
|
|
|
|
Data::FieldPtr f = 0;
|
|
|
|
const TQString& fieldTag = (*s_tagMap)[tag_];
|
|
|
|
if(!fieldTag.isEmpty()) {
|
|
|
|
f = m_coll->fieldByName(fieldTag);
|
|
|
|
if(f) {
|
|
|
|
f->setProperty(TQString::tqfromLatin1("ris"), tag_);
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// add non-default fields if not already there
|
|
|
|
if(tag_== Latin1Literal("L1")) {
|
|
|
|
f = new Data::Field(TQString::tqfromLatin1("pdf"), i18n("PDF"), Data::Field::URL);
|
|
|
|
f->setProperty(TQString::tqfromLatin1("ris"), TQString::tqfromLatin1("L1"));
|
|
|
|
f->setCategory(i18n("Miscellaneous"));
|
|
|
|
}
|
|
|
|
m_coll->addField(f);
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
|
|
|
void RISImporter::slotCancel() {
|
|
|
|
m_cancelled = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "risimporter.moc"
|