You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdepim/indexlib/main.cpp

212 lines
5.8 KiB

/* This file is part of indexlib.
* Copyright (C) 2005 Luís Pedro Coelho <luis@luispedro.org>
*
* Indexlib is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation and available as file
* GPL_V2 which is distributed along with indexlib.
*
* Indexlib is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA
*
* In addition, as a special exception, the copyright holders give
* permission to link the code of this program with any edition of
* the TQt library by Trolltech AS, Norway (or with modified versions
* of TQt that use the same license as TQt), and distribute linked
* combinations including the two. You must obey the GNU General
* Public License in all respects for all of the code used other than
* TQt. If you modify this file, you may extend this exception to
* your version of the file, but you are not obligated to do so. If
* you do not wish to do so, delete this exception statement from
* your version.
*/
#include "stringarray.h"
#include "leafdata.h"
#include "manager.h"
#include "mmap_manager.h"
#include "mempool.h"
#include "compressed.h"
#include "create.h"
#include "tokenizer.h"
#include <sstream>
#include <map>
#include <iostream>
#include <cstdlib>
#include <string>
#include <fstream>
#include <memory>
#include <string.h>
typedef std::unique_ptr<indexlib::index> index_smart;
index_smart get_index( std::string name ) {
return indexlib::open( name.c_str(), indexlib::open_flags::create_quotes );
}
std::string read_stream( std::istream& in ) {
std::string res;
char c;
while ( in.get( c ) ) res.push_back( c );
return res;
}
std::string read_string( std::string file ) {
if ( file == "-" ) return read_stream( std::cin );
std::ifstream in( file.c_str() );
return read_stream( in );
}
void usage( int argc, char* argv[], const std::map<std::string, int (*)( int, char** )>& commands ) {
std::cout
<< argv[ 0 ]
<< " cmd [index]\n"
<< "Possible Commands:\n\n";
for ( std::map<std::string, int (*)( int, char** )>::const_iterator first = commands.begin(), past = commands.end(); first != past; ++first ) {
std::cout << '\t' << first->first << '\n';
}
std::cout << std::endl;
}
int debug( int argc, char* argv[] ) {
using namespace indexlib;
using namespace indexlib::detail;
std::string type = argv[ 2 ];
std::string argument = argv[ 3 ];
if ( type == "print.sa" ) {
//nolog();
std::cout << "stringarray:\n";
stringarray sa( argument );
sa.print( std::cout );
} else if ( type == "print.compressed" ) {
compressed_file file( argument );
nolog();
std::cout << "compressed_file:\n";
file.print( std::cout );
} else if ( type == "break_up" ) {
std::unique_ptr<tokenizer> tok = get_tokenizer( "latin-1:european" );
if ( !tok ) {
std::cerr << "Could not get tokenizer\n";
return 1;
}
nolog();
std::ostringstream whole_str;
whole_str << std::ifstream( argument.c_str() ).rdbuf();
std::vector<std::string> words = tok->string_to_words( whole_str.str().c_str() );
for ( std::vector<std::string>::const_iterator cur = words.begin(), past = words.end(); cur != past; ++cur ) {
std::cout << *cur << '\n';
}
} else {
std::cerr << "Unknown function\n";
return 1;
}
return 0;
}
int remove_doc( int argc, char* argv[] ) {
if ( argc < 4 ) {
std::cerr << "Filename argument for remove_doc is required\n";
return 1;
}
index_smart t = get_index( argv[ 2 ] );
t->remove_doc( argv[ 3 ] );
return 0;
}
int maintenance( int argc, char* argv[] ) {
index_smart t = get_index( argv[ 2 ] );
t->maintenance();
return 0;
}
int add( int argc, char* argv[] ) {
if ( argc < 4 ) {
std::cerr <<
"Input file argument is required\n"
"Name is optional (defaults to filename)\n";
return 1;
}
index_smart t = get_index( argv[ 2 ] );
std::string input;
if ( argv[ 4 ] ) input = argv[ 4 ];
else input = argv[ 3 ];
t->add( read_string( input ), argv[ 3 ] );
return 0;
}
int search( int argc, char* argv[] ) {
if ( argc < 4 ) {
std::cerr << "Search string is required\n";
return 1;
}
index_smart t = get_index( argv[ 2 ] );
std::vector<unsigned> files = t->search( argv[ 3 ] )->list();
for ( std::vector<unsigned>::const_iterator first = files.begin(), past = files.end();
first != past; ++first ) {
std::cout << t->lookup_docname( *first ) << std::endl;
}
return 0;
}
int list( int argc, char* argv[] ) {
index_smart t = get_index( argv[ 2 ] );
unsigned ndocs = t->ndocs();
for ( unsigned i = 0; i != ndocs; ++i ) {
std::cout << t->lookup_docname( i ) << std::endl;
}
return 0;
}
int remove( int argc, char* argv[] ) {
indexlib::remove( argv[ 2 ] );
}
int main( int argc, char* argv[]) try {
//nolog();
std::map<std::string, int (*)( int, char* [] )> handlers;
handlers[ "debug" ] = &debug;
handlers[ "remove" ] = &remove;
handlers[ "remove_doc" ] = &remove_doc;
handlers[ "maintenance" ] = &maintenance;
handlers[ "add" ] = &add;
handlers[ "search" ] = &search;
handlers[ "list" ] = &list;
if ( argc < 3 ) {
usage( argc, argv, handlers );
return 0;
}
int ( *handle )( int, char*[] ) = handlers[ argv[ 1 ] ];
if ( handle ) return handle( argc, argv );
else {
std::cerr << "Unkown command: " << argv[ 1 ] << std::endl;
return 1;
}
} catch ( const char* msg ) {
std::cerr << "Error: " << msg << std::endl;
return 1;
} catch ( std::exception& e ) {
std::cerr << "Std Error: " << e.what() << std::endl;
return 1;
} catch ( ... ) {
std::cerr << "Some Unspecified error\n";
return 1;
}