You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tellico/src/translators/bibteximporter.cpp

355 lines
11 KiB

/***************************************************************************
copyright : (C) 2003-2006 by Robby Stephenson
email : robby@periapsis.org
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "bibteximporter.h"
#include "bibtexhandler.h"
#include "../collections/bibtexcollection.h"
#include "../entry.h"
#include "../latin1literal.h"
#include "../progressmanager.h"
#include "../filehandler.h"
#include "../tellico_debug.h"
#include <tdeapplication.h>
#include <tdeconfig.h>
#include <tqptrlist.h>
#include <tqregexp.h>
#include <tqlayout.h>
#include <tqvbuttongroup.h>
#include <tqradiobutton.h>
#include <tqwhatsthis.h>
#include <tqtextcodec.h>
using Tellico::Import::BibtexImporter;
BibtexImporter::BibtexImporter(const KURL::List& urls_) : Importer(urls_)
, m_coll(0), m_widget(0), m_readUTF8(0), m_readLocale(0), m_cancelled(false) {
bt_initialize();
}
BibtexImporter::BibtexImporter(const TQString& text_) : Importer(text_)
, m_coll(0), m_widget(0), m_readUTF8(0), m_readLocale(0), m_cancelled(false) {
bt_initialize();
}
BibtexImporter::~BibtexImporter() {
bt_cleanup();
if(m_readUTF8) {
TDEConfigGroup config(kapp->config(), "Import Options");
config.writeEntry("Bibtex UTF8", m_readUTF8->isChecked());
}
}
bool BibtexImporter::canImport(int type) const {
return type == Data::Collection::Bibtex;
}
Tellico::Data::CollPtr BibtexImporter::collection() {
if(m_coll) {
return m_coll;
}
ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true);
item.setTotalSteps(urls().count() * 100);
connect(&item, TQT_SIGNAL(signalCancelled(ProgressItem*)), TQT_SLOT(slotCancel()));
ProgressItem::Done done(this);
bool useUTF8 = m_widget && m_readUTF8->isChecked();
m_coll = new Data::BibtexCollection(true);
int count = 0;
// might be importing text only
if(!text().isEmpty()) {
TQString text = this->text();
Data::CollPtr coll = readCollection(text, count);
if(!coll || coll->entryCount() == 0) {
setStatusMessage(i18n("No valid bibtex entries were found"));
} else {
m_coll->addEntries(coll->entries());
}
}
KURL::List urls = this->urls();
for(KURL::List::ConstIterator it = urls.begin(); it != urls.end(); ++it, ++count) {
if(m_cancelled) {
return 0;
}
if(!(*it).isValid()) {
continue;
}
TQString text = FileHandler::readTextFile(*it, false, useUTF8);
if(text.isEmpty()) {
continue;
}
Data::CollPtr coll = readCollection(text, count);
if(!coll || coll->entryCount() == 0) {
setStatusMessage(i18n("No valid bibtex entries were found in file - %1").arg(url().fileName()));
continue;
}
m_coll->addEntries(coll->entries());
}
if(m_cancelled) {
return 0;
}
return m_coll;
}
Tellico::Data::CollPtr BibtexImporter::readCollection(const TQString& text, int n) {
if(text.isEmpty()) {
myDebug() << "BibtexImporter::readCollection() - no text" << endl;
return 0;
}
Data::CollPtr ptr = new Data::BibtexCollection(true);
Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(ptr.data());
parseText(text); // populates m_nodes
if(m_cancelled) {
return 0;
}
if(m_nodes.isEmpty()) {
return 0;
}
TQString str;
const uint count = m_nodes.count();
const uint stepSize = TQMAX(s_stepSize, count/100);
const bool showProgress = options() & ImportProgress;
uint j = 0;
for(ASTListIterator it(m_nodes); !m_cancelled && it.current(); ++it, ++j) {
// if we're parsing a macro string, comment or preamble, skip it for now
if(bt_entry_metatype(it.current()) == BTE_PREAMBLE) {
char* preamble = bt_get_text(it.current());
if(preamble) {
c->setPreamble(TQString::fromUtf8(preamble));
}
continue;
}
if(bt_entry_metatype(it.current()) == BTE_MACRODEF) {
char* macro;
(void) bt_next_field(it.current(), 0, &macro);
// FIXME: replace macros within macro definitions!
// lookup lowercase macro in map
c->addMacro(m_macros[TQString::fromUtf8(macro)], TQString::fromUtf8(bt_macro_text(macro, 0, 0)));
continue;
}
if(bt_entry_metatype(it.current()) == BTE_COMMENT) {
continue;
}
// now we're parsing a regular entry
Data::EntryPtr entry = new Data::Entry(ptr);
str = TQString::fromUtf8(bt_entry_type(it.current()));
// kdDebug() << "entry type: " << str << endl;
// text is automatically put into lower-case by btparse
BibtexHandler::setFieldValue(entry, TQString::fromLatin1("entry-type"), str);
str = TQString::fromUtf8(bt_entry_key(it.current()));
// kdDebug() << "entry key: " << str << endl;
BibtexHandler::setFieldValue(entry, TQString::fromLatin1("key"), str);
char* name;
AST* field = 0;
while((field = bt_next_field(it.current(), field, &name))) {
// kdDebug() << "\tfound: " << name << endl;
// str = TQString::fromLatin1(bt_get_text(field));
str.truncate(0);
AST* value = 0;
bt_nodetype type;
char* svalue;
bool end_macro = false;
while((value = bt_next_value(field, value, &type, &svalue))) {
switch(type) {
case BTAST_STRING:
case BTAST_NUMBER:
str += BibtexHandler::importText(svalue).simplifyWhiteSpace();
end_macro = false;
break;
case BTAST_MACRO:
str += TQString::fromUtf8(svalue) + '#';
end_macro = true;
break;
default:
break;
}
}
if(end_macro) {
// remove last character '#'
str.truncate(str.length() - 1);
}
TQString fieldName = TQString::fromUtf8(name);
if(fieldName == Latin1Literal("author") || fieldName == Latin1Literal("editor")) {
str.replace(TQRegExp(TQString::fromLatin1("\\sand\\s")), TQString::fromLatin1("; "));
}
BibtexHandler::setFieldValue(entry, fieldName, str);
}
ptr->addEntries(entry);
if(showProgress && j%stepSize == 0) {
ProgressManager::self()->setProgress(this, n*100 + 100*j/count);
kapp->processEvents();
}
}
if(m_cancelled) {
ptr = 0;
}
// clean-up
for(ASTListIterator it(m_nodes); it.current(); ++it) {
bt_free_ast(it.current());
}
return ptr;
}
void BibtexImporter::parseText(const TQString& text) {
m_nodes.clear();
m_macros.clear();
ushort bt_options = 0; // ushort is defined in btparse.h
boolean ok; // boolean is defined in btparse.h as an int
// for regular nodes (entries), do NOT convert numbers to strings, do NOT expand macros
bt_set_stringopts(BTE_REGULAR, 0);
bt_set_stringopts(BTE_MACRODEF, 0);
// bt_set_stringopts(BTE_PREAMBLE, BTO_CONVERT | BTO_EXPAND);
TQString entry;
TQRegExp rx(TQString::fromLatin1("[{}]"));
TQRegExp macroName(TQString::fromLatin1("@string\\s*\\{\\s*(.*)="), false /*case sensitive*/);
macroName.setMinimal(true);
bool needsCleanup = false;
int brace = 0;
int startpos = 0;
int pos = text.find(rx, 0);
while(pos > 0 && !m_cancelled) {
if(text[pos] == '{') {
++brace;
} else if(text[pos] == '}' && brace > 0) {
--brace;
}
if(brace == 0) {
entry = text.mid(startpos, pos-startpos+1).stripWhiteSpace();
// All the downstream text processing on the AST node will assume utf-8
AST* node = bt_parse_entry_s(const_cast<char*>(entry.utf8().data()),
const_cast<char*>(url().fileName().local8Bit().data()),
0, bt_options, &ok);
if(ok && node) {
if(bt_entry_metatype(node) == BTE_MACRODEF && macroName.search(entry) > -1) {
char* macro;
(void) bt_next_field(node, 0, &macro);
m_macros.insert(TQString::fromUtf8(macro), macroName.cap(1).stripWhiteSpace());
}
m_nodes.append(node);
needsCleanup = true;
}
startpos = pos+1;
}
pos = text.find(rx, pos+1);
}
if(needsCleanup) {
// clean up some structures
bt_parse_entry_s(0, 0, 1, 0, 0);
}
}
void BibtexImporter::slotCancel() {
m_cancelled = true;
}
TQWidget* BibtexImporter::widget(TQWidget* parent_, const char* name_/*=0*/) {
if(m_widget) {
return m_widget;
}
m_widget = new TQWidget(parent_, name_);
TQVBoxLayout* l = new TQVBoxLayout(m_widget);
TQButtonGroup* box = new TQVButtonGroup(i18n("Bibtex Options"), m_widget);
m_readUTF8 = new TQRadioButton(i18n("Use Unicode (UTF-8) encoding"), box);
TQWhatsThis::add(m_readUTF8, i18n("Read the imported file in Unicode (UTF-8)."));
TQString localStr = i18n("Use user locale (%1) encoding").arg(
TQString::fromLatin1(TQTextCodec::codecForLocale()->name()));
m_readLocale = new TQRadioButton(localStr, box);
m_readLocale->setChecked(true);
TQWhatsThis::add(m_readLocale, i18n("Read the imported file in the local encoding."));
TDEConfigGroup config(kapp->config(), "Import Options");
bool useUTF8 = config.readBoolEntry("Bibtex UTF8", false);
if(useUTF8) {
m_readUTF8->setChecked(true);
} else {
m_readLocale->setChecked(true);
}
l->addWidget(box);
l->addStretch(1);
return m_widget;
}
bool BibtexImporter::maybeBibtex(const KURL& url_) {
TQString text = FileHandler::readTextFile(url_, true /*quiet*/);
if(text.isEmpty()) {
return false;
}
bt_initialize();
TQRegExp rx(TQString::fromLatin1("[{}]"));
ushort bt_options = 0; // ushort is defined in btparse.h
boolean ok; // boolean is defined in btparse.h as an int
bool foundOne = false;
int brace = 0;
int startpos = 0;
int pos = text.find(rx, 0);
while(pos > 0) {
if(text[pos] == '{') {
++brace;
} else if(text[pos] == '}' && brace > 0) {
--brace;
}
if(brace == 0) {
TQString entry = text.mid(startpos, pos-startpos+1).stripWhiteSpace();
// All the downstream text processing on the AST node will assume utf-8
AST* node = bt_parse_entry_s(const_cast<char*>(entry.utf8().data()),
const_cast<char*>(url_.fileName().local8Bit().data()),
0, bt_options, &ok);
if(ok && node) {
foundOne = true;
break;
}
startpos = pos+1;
}
pos = text.find(rx, pos+1);
}
if(foundOne) {
// clean up some structures
bt_parse_entry_s(0, 0, 1, 0, 0);
}
bt_cleanup();
return foundOne;
}
#include "bibteximporter.moc"