You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2318 lines
56 KiB
2318 lines
56 KiB
15 years ago
|
/* This file is part of the KDE libraries
|
||
|
Copyright (C) 2000 Fritz Elfert <fritz@kde.org>
|
||
|
Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com>
|
||
|
|
||
|
This library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Library General Public
|
||
|
License version 2 as published by the Free Software Foundation.
|
||
|
|
||
|
This library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Library General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Library General Public License
|
||
|
along with this library; see the file COPYING.LIB. If not, write to
|
||
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||
|
Boston, MA 02110-1301, USA.
|
||
|
*/
|
||
|
#include "kmimemagic.h"
|
||
|
#include <kdebug.h>
|
||
|
#include <kapplication.h>
|
||
|
#include <qfile.h>
|
||
|
#include <ksimpleconfig.h>
|
||
|
#include <kstandarddirs.h>
|
||
|
#include <kstaticdeleter.h>
|
||
|
#include <klargefile.h>
|
||
|
#include <assert.h>
|
||
|
|
||
|
static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
|
||
|
static void process(struct config_rec* conf, const QString &);
|
||
|
static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
|
||
|
static int tagmagic(unsigned char *buf, int nbytes);
|
||
|
static int textmagic(struct config_rec* conf, unsigned char *, int);
|
||
|
|
||
|
static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
|
||
|
static int match(struct config_rec* conf, unsigned char *, int);
|
||
|
|
||
|
KMimeMagic* KMimeMagic::s_pSelf;
|
||
|
static KStaticDeleter<KMimeMagic> kmimemagicsd;
|
||
|
|
||
|
KMimeMagic* KMimeMagic::self()
|
||
|
{
|
||
|
if( !s_pSelf )
|
||
|
initStatic();
|
||
|
return s_pSelf;
|
||
|
}
|
||
|
|
||
|
void KMimeMagic::initStatic()
|
||
|
{
|
||
|
s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
|
||
|
s_pSelf->setFollowLinks( true );
|
||
|
}
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <unistd.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <sys/wait.h>
|
||
|
#include <sys/types.h>
|
||
|
#include <sys/stat.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <errno.h>
|
||
|
#include <ctype.h>
|
||
|
#include <time.h>
|
||
|
#include <utime.h>
|
||
|
#include <stdarg.h>
|
||
|
#include <qregexp.h>
|
||
|
#include <qstring.h>
|
||
|
|
||
|
//#define MIME_MAGIC_DEBUG_TABLE // untested
|
||
|
|
||
|
// Uncomment to debug the config-file parsing phase
|
||
|
//#define DEBUG_APPRENTICE
|
||
|
// Uncomment to debug the matching phase
|
||
|
//#define DEBUG_MIMEMAGIC
|
||
|
|
||
|
#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
|
||
|
#define DEBUG_LINENUMBERS
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Buitltin Mime types
|
||
|
*/
|
||
|
#define MIME_BINARY_UNKNOWN "application/octet-stream"
|
||
|
#define MIME_BINARY_UNREADABLE "application/x-unreadable"
|
||
|
#define MIME_BINARY_ZEROSIZE "application/x-zerosize"
|
||
|
#define MIME_TEXT_UNKNOWN "text/plain"
|
||
|
#define MIME_TEXT_PLAIN "text/plain"
|
||
|
#define MIME_INODE_DIR "inode/directory"
|
||
|
#define MIME_INODE_CDEV "inode/chardevice"
|
||
|
#define MIME_INODE_BDEV "inode/blockdevice"
|
||
|
#define MIME_INODE_FIFO "inode/fifo"
|
||
|
#define MIME_INODE_LINK "inode/link"
|
||
|
#define MIME_INODE_SOCK "inode/socket"
|
||
|
// Following should go in magic-file - Fritz
|
||
|
#define MIME_APPL_TROFF "application/x-troff"
|
||
|
#define MIME_APPL_TAR "application/x-tar"
|
||
|
#define MIME_TEXT_FORTRAN "text/x-fortran"
|
||
|
|
||
|
#define MAXMIMESTRING 256
|
||
|
|
||
|
#define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */
|
||
|
#define MAXDESC 50 /* max leng of text description */
|
||
|
#define MAXstring 64 /* max leng of "string" types */
|
||
|
|
||
|
typedef union VALUETYPE {
|
||
|
unsigned char b;
|
||
|
unsigned short h;
|
||
|
unsigned long l;
|
||
|
char s[MAXstring];
|
||
|
unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */
|
||
|
unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */
|
||
|
} VALUETYPE;
|
||
|
|
||
|
struct magic {
|
||
|
struct magic *next; /* link to next entry */
|
||
|
#ifdef DEBUG_LINENUMBERS
|
||
|
int lineno; /* line number from magic file - doesn't say from which one ;) */
|
||
|
#endif
|
||
|
|
||
|
short flag;
|
||
|
#define INDIR 1 /* if '>(...)' appears, */
|
||
|
#define UNSIGNED 2 /* comparison is unsigned */
|
||
|
short cont_level; /* level of ">" */
|
||
|
struct {
|
||
|
char type; /* byte short long */
|
||
|
long offset; /* offset from indirection */
|
||
|
} in;
|
||
|
long offset; /* offset to magic number */
|
||
|
unsigned char reln; /* relation (0=eq, '>'=gt, etc) */
|
||
|
char type; /* int, short, long or string. */
|
||
|
char vallen; /* length of string value, if any */
|
||
|
#define BYTE 1
|
||
|
#define SHORT 2
|
||
|
#define LONG 4
|
||
|
#define STRING 5
|
||
|
#define DATE 6
|
||
|
#define BESHORT 7
|
||
|
#define BELONG 8
|
||
|
#define BEDATE 9
|
||
|
#define LESHORT 10
|
||
|
#define LELONG 11
|
||
|
#define LEDATE 12
|
||
|
VALUETYPE value; /* either number or string */
|
||
|
unsigned long mask; /* mask before comparison with value */
|
||
|
char nospflag; /* suppress space character */
|
||
|
|
||
|
/* NOTE: this string is suspected of overrunning - find it! */
|
||
|
char desc[MAXDESC]; /* description */
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* data structures for tar file recognition
|
||
|
* --------------------------------------------------------------------------
|
||
|
* Header file for public domain tar (tape archive) program.
|
||
|
*
|
||
|
* @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John
|
||
|
* Gilmore, ihnp4!hoptoad!gnu.
|
||
|
*
|
||
|
* Header block on tape.
|
||
|
*
|
||
|
* I'm going to use traditional DP naming conventions here. A "block" is a big
|
||
|
* chunk of stuff that we do I/O on. A "record" is a piece of info that we
|
||
|
* care about. Typically many "record"s fit into a "block".
|
||
|
*/
|
||
|
#define RECORDSIZE 512
|
||
|
#define NAMSIZ 100
|
||
|
#define TUNMLEN 32
|
||
|
#define TGNMLEN 32
|
||
|
|
||
|
union record {
|
||
|
char charptr[RECORDSIZE];
|
||
|
struct header {
|
||
|
char name[NAMSIZ];
|
||
|
char mode[8];
|
||
|
char uid[8];
|
||
|
char gid[8];
|
||
|
char size[12];
|
||
|
char mtime[12];
|
||
|
char chksum[8];
|
||
|
char linkflag;
|
||
|
char linkname[NAMSIZ];
|
||
|
char magic[8];
|
||
|
char uname[TUNMLEN];
|
||
|
char gname[TGNMLEN];
|
||
|
char devmajor[8];
|
||
|
char devminor[8];
|
||
|
} header;
|
||
|
};
|
||
|
|
||
|
/* The magic field is filled with this if uname and gname are valid. */
|
||
|
#define TMAGIC "ustar " /* 7 chars and a null */
|
||
|
|
||
|
/*
|
||
|
* file-function prototypes
|
||
|
*/
|
||
|
static int is_tar(unsigned char *, int);
|
||
|
static unsigned long signextend(struct magic *, unsigned long);
|
||
|
static int getvalue(struct magic *, char **);
|
||
|
static int hextoint(int);
|
||
|
static char *getstr(char *, char *, int, int *);
|
||
|
static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
|
||
|
static int mcheck(union VALUETYPE *, struct magic *);
|
||
|
static int mconvert(union VALUETYPE *, struct magic *);
|
||
|
static long from_oct(int, char *);
|
||
|
|
||
|
/*
|
||
|
* includes for ASCII substring recognition formerly "names.h" in file
|
||
|
* command
|
||
|
*
|
||
|
* Original notes: names and types used by ascmagic in file(1).
|
||
|
* These tokens are
|
||
|
* here because they can appear anywhere in the first HOWMANY bytes, while
|
||
|
* tokens in /etc/magic must appear at fixed offsets into the file. Don't
|
||
|
* make HOWMANY too high unless you have a very fast CPU.
|
||
|
*/
|
||
|
|
||
|
/* these types are used calculate index to 'types': keep em in sync! */
|
||
|
/* HTML inserted in first because this is a web server module now */
|
||
|
/* ENG removed because stupid */
|
||
|
#define L_HTML 0x001 /* HTML */
|
||
|
#define L_C 0x002 /* first and foremost on UNIX */
|
||
|
#define L_MAKE 0x004 /* Makefiles */
|
||
|
#define L_PLI 0x008 /* PL/1 */
|
||
|
#define L_MACH 0x010 /* some kinda assembler */
|
||
|
#define L_PAS 0x020 /* Pascal */
|
||
|
#define L_JAVA 0x040 /* Java source */
|
||
|
#define L_CPP 0x080 /* C++ */
|
||
|
#define L_MAIL 0x100 /* Electronic mail */
|
||
|
#define L_NEWS 0x200 /* Usenet Netnews */
|
||
|
#define L_DIFF 0x400 /* Output of diff */
|
||
|
#define L_OBJC 0x800 /* Objective C */
|
||
|
|
||
|
// Note: this is not a type, it's just used to mark items that should count more
|
||
|
#define FLAG_STRONG 0x1000
|
||
|
|
||
|
#define P_HTML 0 /* HTML */
|
||
|
#define P_C 1 /* first and foremost on UNIX */
|
||
|
#define P_MAKE 2 /* Makefiles */
|
||
|
#define P_PLI 3 /* PL/1 */
|
||
|
#define P_MACH 4 /* some kinda assembler */
|
||
|
#define P_PAS 5 /* Pascal */
|
||
|
#define P_JAVA 6 /* Java source */
|
||
|
#define P_CPP 7 /* C++ */
|
||
|
#define P_MAIL 8 /* Electronic mail */
|
||
|
#define P_NEWS 9 /* Usenet Netnews */
|
||
|
#define P_DIFF 10 /* Output of diff */
|
||
|
#define P_OBJC 11 /* Objective C */
|
||
|
|
||
|
typedef struct asc_type {
|
||
|
const char *type;
|
||
|
int kwords;
|
||
|
double weight;
|
||
|
} asc_type;
|
||
|
|
||
|
static const asc_type types[] = {
|
||
|
{ "text/html", 19, 2 }, // 10 items but 10 different words only
|
||
|
{ "text/x-c", 13, 1 },
|
||
|
{ "text/x-makefile", 4, 1.9 },
|
||
|
{ "text/x-pli", 1, 3 },
|
||
|
{ "text/x-assembler", 6, 2.1 },
|
||
|
{ "text/x-pascal", 1, 1 },
|
||
|
{ "text/x-java", 12, 1 },
|
||
|
{ "text/x-c++", 19, 1 },
|
||
|
{ "message/rfc822", 4, 1.9 },
|
||
|
{ "message/news", 3, 2 },
|
||
|
{ "text/x-diff", 4, 2 },
|
||
|
{ "text/x-objc", 10, 1 }
|
||
|
};
|
||
|
|
||
|
#define NTYPES (sizeof(types)/sizeof(asc_type))
|
||
|
|
||
|
static struct names {
|
||
|
const char *name;
|
||
|
short type;
|
||
|
} const names[] = {
|
||
|
{
|
||
|
"<html", L_HTML | FLAG_STRONG
|
||
|
},
|
||
|
{
|
||
|
"<HTML", L_HTML | FLAG_STRONG
|
||
|
},
|
||
|
{
|
||
|
"<head", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<HEAD", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<body", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<BODY", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<title", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<TITLE", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<h1", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<H1", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<a", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<A", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<img", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<IMG", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<!--", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<!doctype", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<!DOCTYPE", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<div", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<DIV", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<frame", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<FRAME", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<frameset", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<FRAMESET", L_HTML
|
||
|
},
|
||
|
{
|
||
|
"<script", L_HTML | FLAG_STRONG
|
||
|
},
|
||
|
{
|
||
|
"<SCRIPT", L_HTML | FLAG_STRONG
|
||
|
},
|
||
|
{
|
||
|
"/*", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"//", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"#include", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"#ifdef", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"#ifndef", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"bool", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"char", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"int", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"float", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"void", L_C|L_CPP|L_JAVA|L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"extern", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"struct", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"union", L_C|L_CPP
|
||
|
},
|
||
|
{
|
||
|
"implements", L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"super", L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"import", L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"class", L_CPP|L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"public", L_CPP|L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"private", L_CPP|L_JAVA
|
||
|
},
|
||
|
{
|
||
|
"explicit", L_CPP
|
||
|
},
|
||
|
{
|
||
|
"virtual", L_CPP
|
||
|
},
|
||
|
{
|
||
|
"namespace", L_CPP
|
||
|
},
|
||
|
{
|
||
|
"#import", L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"@interface", L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"@implementation", L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"@protocol", L_OBJC
|
||
|
},
|
||
|
{
|
||
|
"CFLAGS", L_MAKE
|
||
|
},
|
||
|
{
|
||
|
"LDFLAGS", L_MAKE
|
||
|
},
|
||
|
{
|
||
|
"all:", L_MAKE
|
||
|
},
|
||
|
{
|
||
|
".PHONY:", L_MAKE
|
||
|
},
|
||
|
{
|
||
|
"srcdir", L_MAKE
|
||
|
},
|
||
|
{
|
||
|
"exec_prefix", L_MAKE
|
||
|
},
|
||
|
/*
|
||
|
* Too many files of text have these words in them. Find another way
|
||
|
* to recognize Fortrash.
|
||
|
*/
|
||
|
{
|
||
|
".ascii", L_MACH
|
||
|
},
|
||
|
{
|
||
|
".asciiz", L_MACH
|
||
|
},
|
||
|
{
|
||
|
".byte", L_MACH
|
||
|
},
|
||
|
{
|
||
|
".even", L_MACH
|
||
|
},
|
||
|
{
|
||
|
".globl", L_MACH
|
||
|
},
|
||
|
{
|
||
|
"clr", L_MACH
|
||
|
},
|
||
|
{
|
||
|
"(input", L_PAS
|
||
|
},
|
||
|
{
|
||
|
"dcl", L_PLI
|
||
|
},
|
||
|
{
|
||
|
"Received:", L_MAIL
|
||
|
},
|
||
|
/* we now stop at '>' for tokens, so this one won't work {
|
||
|
">From", L_MAIL
|
||
|
},*/
|
||
|
{
|
||
|
"Return-Path:", L_MAIL
|
||
|
},
|
||
|
{
|
||
|
"Cc:", L_MAIL
|
||
|
},
|
||
|
{
|
||
|
"Newsgroups:", L_NEWS
|
||
|
},
|
||
|
{
|
||
|
"Path:", L_NEWS
|
||
|
},
|
||
|
{
|
||
|
"Organization:", L_NEWS
|
||
|
},
|
||
|
{
|
||
|
"---", L_DIFF
|
||
|
},
|
||
|
{
|
||
|
"+++", L_DIFF
|
||
|
},
|
||
|
{
|
||
|
"***", L_DIFF
|
||
|
},
|
||
|
{
|
||
|
"@@", L_DIFF
|
||
|
},
|
||
|
{
|
||
|
NULL, 0
|
||
|
}
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Configuration for the utime() problem.
|
||
|
* Here's the problem:
|
||
|
* By looking into a file to determine its mimetype, we change its "last access"
|
||
|
* time (atime) and this can have side effects, like files in /tmp never being
|
||
|
* cleaned up because of that. So in temp directories, we restore the atime.
|
||
|
* Since this changes the ctime (last change of attributes), we don't do that
|
||
|
* anywhere else, because that breaks archiving programs, that check the ctime.
|
||
|
* Hence this class, to configure the directories where the atime should be restored.
|
||
|
*/
|
||
|
class KMimeMagicUtimeConf
|
||
|
{
|
||
|
public:
|
||
|
KMimeMagicUtimeConf()
|
||
|
{
|
||
|
tmpDirs << QString::fromLatin1("/tmp"); // default value
|
||
|
|
||
|
// The trick is that we also don't want the user to override globally set
|
||
|
// directories. So we have to misuse KStandardDirs :}
|
||
|
QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
|
||
|
if ( !confDirs.isEmpty() )
|
||
|
{
|
||
|
QString globalConf = confDirs.last() + "kmimemagicrc";
|
||
|
if ( QFile::exists( globalConf ) )
|
||
|
{
|
||
|
KSimpleConfig cfg( globalConf );
|
||
|
cfg.setGroup( "Settings" );
|
||
|
tmpDirs = cfg.readListEntry( "atimeDirs" );
|
||
|
}
|
||
|
if ( confDirs.count() > 1 )
|
||
|
{
|
||
|
QString localConf = confDirs.first() + "kmimemagicrc";
|
||
|
if ( QFile::exists( localConf ) )
|
||
|
{
|
||
|
KSimpleConfig cfg( localConf );
|
||
|
cfg.setGroup( "Settings" );
|
||
|
tmpDirs += cfg.readListEntry( "atimeDirs" );
|
||
|
}
|
||
|
}
|
||
|
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
|
||
|
{
|
||
|
QString dir = *it;
|
||
|
if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
|
||
|
(*it) += '/';
|
||
|
}
|
||
|
}
|
||
|
#if 0
|
||
|
// debug code
|
||
|
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
|
||
|
kdDebug(7018) << " atimeDir: " << *it << endl;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
bool restoreAccessTime( const QString & file ) const
|
||
|
{
|
||
|
QString dir = file.left( file.findRev( '/' ) );
|
||
|
bool res = tmpDirs.contains( dir );
|
||
|
//kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl;
|
||
|
return res;
|
||
|
}
|
||
|
QStringList tmpDirs;
|
||
|
};
|
||
|
|
||
|
/* current config */
|
||
|
struct config_rec {
|
||
|
bool followLinks;
|
||
|
QString resultBuf;
|
||
|
int accuracy;
|
||
|
|
||
|
struct magic *magic, /* head of magic config list */
|
||
|
*last;
|
||
|
KMimeMagicUtimeConf * utimeConf;
|
||
|
};
|
||
|
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
static void
|
||
|
test_table()
|
||
|
{
|
||
|
struct magic *m;
|
||
|
struct magic *prevm = NULL;
|
||
|
|
||
|
kdDebug(7018) << "test_table : started" << endl;
|
||
|
for (m = conf->magic; m; m = m->next) {
|
||
|
if (isprint((((unsigned long) m) >> 24) & 255) &&
|
||
|
isprint((((unsigned long) m) >> 16) & 255) &&
|
||
|
isprint((((unsigned long) m) >> 8) & 255) &&
|
||
|
isprint(((unsigned long) m) & 255)) {
|
||
|
//debug("test_table: POINTER CLOBBERED! "
|
||
|
//"m=\"%c%c%c%c\" line=%d",
|
||
|
(((unsigned long) m) >> 24) & 255,
|
||
|
(((unsigned long) m) >> 16) & 255,
|
||
|
(((unsigned long) m) >> 8) & 255,
|
||
|
((unsigned long) m) & 255,
|
||
|
prevm ? prevm->lineno : -1);
|
||
|
break;
|
||
|
}
|
||
|
prevm = m;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#define EATAB {while (isascii((unsigned char) *l) && \
|
||
|
isspace((unsigned char) *l)) ++l;}
|
||
|
|
||
|
int KMimeMagic::parse_line(char *line, int *rule, int lineno)
|
||
|
{
|
||
|
int ws_offset;
|
||
|
|
||
|
/* delete newline */
|
||
|
if (line[0]) {
|
||
|
line[strlen(line) - 1] = '\0';
|
||
|
}
|
||
|
/* skip leading whitespace */
|
||
|
ws_offset = 0;
|
||
|
while (line[ws_offset] && isspace(line[ws_offset])) {
|
||
|
ws_offset++;
|
||
|
}
|
||
|
|
||
|
/* skip blank lines */
|
||
|
if (line[ws_offset] == 0) {
|
||
|
return 0;
|
||
|
}
|
||
|
/* comment, do not parse */
|
||
|
if (line[ws_offset] == '#')
|
||
|
return 0;
|
||
|
|
||
|
/* if we get here, we're going to use it so count it */
|
||
|
(*rule)++;
|
||
|
|
||
|
/* parse it */
|
||
|
return (parse(line + ws_offset, lineno) != 0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* apprentice - load configuration from the magic file.
|
||
|
*/
|
||
|
int KMimeMagic::apprentice( const QString& magicfile )
|
||
|
{
|
||
|
FILE *f;
|
||
|
char line[BUFSIZ + 1];
|
||
|
int errs = 0;
|
||
|
int lineno;
|
||
|
int rule = 0;
|
||
|
QCString fname;
|
||
|
|
||
|
if (magicfile.isEmpty())
|
||
|
return -1;
|
||
|
fname = QFile::encodeName(magicfile);
|
||
|
f = fopen(fname, "r");
|
||
|
if (f == NULL) {
|
||
|
kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
/* parse it */
|
||
|
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
|
||
|
if (parse_line(line, &rule, lineno))
|
||
|
errs++;
|
||
|
|
||
|
fclose(f);
|
||
|
|
||
|
#ifdef DEBUG_APPRENTICE
|
||
|
kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
|
||
|
kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
|
||
|
#endif
|
||
|
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
test_table();
|
||
|
#endif
|
||
|
|
||
|
return (errs ? -1 : 0);
|
||
|
}
|
||
|
|
||
|
int KMimeMagic::buff_apprentice(char *buff)
|
||
|
{
|
||
|
char line[BUFSIZ + 2];
|
||
|
int errs = 0;
|
||
|
int lineno = 1;
|
||
|
char *start = buff;
|
||
|
char *end;
|
||
|
int count = 0;
|
||
|
int rule = 0;
|
||
|
int len = strlen(buff) + 1;
|
||
|
|
||
|
/* parse it */
|
||
|
do {
|
||
|
count = (len > BUFSIZ-1)?BUFSIZ-1:len;
|
||
|
strncpy(line, start, count);
|
||
|
line[count] = '\0';
|
||
|
if ((end = strchr(line, '\n'))) {
|
||
|
*(++end) = '\0';
|
||
|
count = strlen(line);
|
||
|
} else
|
||
|
strcat(line, "\n");
|
||
|
start += count;
|
||
|
len -= count;
|
||
|
if (parse_line(line, &rule, lineno))
|
||
|
errs++;
|
||
|
lineno++;
|
||
|
} while (len > 0);
|
||
|
|
||
|
#ifdef DEBUG_APPRENTICE
|
||
|
kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
|
||
|
kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
|
||
|
#endif
|
||
|
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
test_table();
|
||
|
#endif
|
||
|
|
||
|
return (errs ? -1 : 0);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* extend the sign bit if the comparison is to be signed
|
||
|
*/
|
||
|
static unsigned long
|
||
|
signextend(struct magic *m, unsigned long v)
|
||
|
{
|
||
|
if (!(m->flag & UNSIGNED))
|
||
|
switch (m->type) {
|
||
|
/*
|
||
|
* Do not remove the casts below. They are vital.
|
||
|
* When later compared with the data, the sign
|
||
|
* extension must have happened.
|
||
|
*/
|
||
|
case BYTE:
|
||
|
v = (char) v;
|
||
|
break;
|
||
|
case SHORT:
|
||
|
case BESHORT:
|
||
|
case LESHORT:
|
||
|
v = (short) v;
|
||
|
break;
|
||
|
case DATE:
|
||
|
case BEDATE:
|
||
|
case LEDATE:
|
||
|
case LONG:
|
||
|
case BELONG:
|
||
|
case LELONG:
|
||
|
v = (long) v;
|
||
|
break;
|
||
|
case STRING:
|
||
|
break;
|
||
|
default:
|
||
|
kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
|
||
|
return 998; //good value
|
||
|
}
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* parse one line from magic file, put into magic[index++] if valid
|
||
|
*/
|
||
|
int KMimeMagic::parse(char *l, int
|
||
|
#ifdef DEBUG_LINENUMBERS
|
||
|
lineno
|
||
|
#endif
|
||
|
)
|
||
|
{
|
||
|
int i = 0;
|
||
|
struct magic *m;
|
||
|
char *t,
|
||
|
*s;
|
||
|
/* allocate magic structure entry */
|
||
|
if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
|
||
|
kdError(7018) << "parse: Out of memory." << endl;
|
||
|
return -1;
|
||
|
}
|
||
|
/* append to linked list */
|
||
|
m->next = NULL;
|
||
|
if (!conf->magic || !conf->last) {
|
||
|
conf->magic = conf->last = m;
|
||
|
} else {
|
||
|
conf->last->next = m;
|
||
|
conf->last = m;
|
||
|
}
|
||
|
|
||
|
/* set values in magic structure */
|
||
|
m->flag = 0;
|
||
|
m->cont_level = 0;
|
||
|
#ifdef DEBUG_LINENUMBERS
|
||
|
m->lineno = lineno;
|
||
|
#endif
|
||
|
|
||
|
while (*l == '>') {
|
||
|
++l; /* step over */
|
||
|
m->cont_level++;
|
||
|
}
|
||
|
|
||
|
if (m->cont_level != 0 && *l == '(') {
|
||
|
++l; /* step over */
|
||
|
m->flag |= INDIR;
|
||
|
}
|
||
|
/* get offset, then skip over it */
|
||
|
m->offset = (int) strtol(l, &t, 0);
|
||
|
if (l == t) {
|
||
|
kdError(7018) << "parse: offset " << l << " invalid" << endl;
|
||
|
}
|
||
|
l = t;
|
||
|
|
||
|
if (m->flag & INDIR) {
|
||
|
m->in.type = LONG;
|
||
|
m->in.offset = 0;
|
||
|
/*
|
||
|
* read [.lbs][+-]nnnnn)
|
||
|
*/
|
||
|
if (*l == '.') {
|
||
|
switch (*++l) {
|
||
|
case 'l':
|
||
|
m->in.type = LONG;
|
||
|
break;
|
||
|
case 's':
|
||
|
m->in.type = SHORT;
|
||
|
break;
|
||
|
case 'b':
|
||
|
m->in.type = BYTE;
|
||
|
break;
|
||
|
default:
|
||
|
kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
|
||
|
break;
|
||
|
}
|
||
|
l++;
|
||
|
}
|
||
|
s = l;
|
||
|
if (*l == '+' || *l == '-')
|
||
|
l++;
|
||
|
if (isdigit((unsigned char) *l)) {
|
||
|
m->in.offset = strtol(l, &t, 0);
|
||
|
if (*s == '-')
|
||
|
m->in.offset = -m->in.offset;
|
||
|
} else
|
||
|
t = l;
|
||
|
if (*t++ != ')') {
|
||
|
kdError(7018) << "parse: missing ')' in indirect offset" << endl;
|
||
|
}
|
||
|
l = t;
|
||
|
}
|
||
|
while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
|
||
|
++l;
|
||
|
EATAB;
|
||
|
|
||
|
#define NBYTE 4
|
||
|
#define NSHORT 5
|
||
|
#define NLONG 4
|
||
|
#define NSTRING 6
|
||
|
#define NDATE 4
|
||
|
#define NBESHORT 7
|
||
|
#define NBELONG 6
|
||
|
#define NBEDATE 6
|
||
|
#define NLESHORT 7
|
||
|
#define NLELONG 6
|
||
|
#define NLEDATE 6
|
||
|
|
||
|
if (*l == 'u') {
|
||
|
++l;
|
||
|
m->flag |= UNSIGNED;
|
||
|
}
|
||
|
/* get type, skip it */
|
||
|
if (strncmp(l, "byte", NBYTE) == 0) {
|
||
|
m->type = BYTE;
|
||
|
l += NBYTE;
|
||
|
} else if (strncmp(l, "short", NSHORT) == 0) {
|
||
|
m->type = SHORT;
|
||
|
l += NSHORT;
|
||
|
} else if (strncmp(l, "long", NLONG) == 0) {
|
||
|
m->type = LONG;
|
||
|
l += NLONG;
|
||
|
} else if (strncmp(l, "string", NSTRING) == 0) {
|
||
|
m->type = STRING;
|
||
|
l += NSTRING;
|
||
|
} else if (strncmp(l, "date", NDATE) == 0) {
|
||
|
m->type = DATE;
|
||
|
l += NDATE;
|
||
|
} else if (strncmp(l, "beshort", NBESHORT) == 0) {
|
||
|
m->type = BESHORT;
|
||
|
l += NBESHORT;
|
||
|
} else if (strncmp(l, "belong", NBELONG) == 0) {
|
||
|
m->type = BELONG;
|
||
|
l += NBELONG;
|
||
|
} else if (strncmp(l, "bedate", NBEDATE) == 0) {
|
||
|
m->type = BEDATE;
|
||
|
l += NBEDATE;
|
||
|
} else if (strncmp(l, "leshort", NLESHORT) == 0) {
|
||
|
m->type = LESHORT;
|
||
|
l += NLESHORT;
|
||
|
} else if (strncmp(l, "lelong", NLELONG) == 0) {
|
||
|
m->type = LELONG;
|
||
|
l += NLELONG;
|
||
|
} else if (strncmp(l, "ledate", NLEDATE) == 0) {
|
||
|
m->type = LEDATE;
|
||
|
l += NLEDATE;
|
||
|
} else {
|
||
|
kdError(7018) << "parse: type " << l << " invalid" << endl;
|
||
|
return -1;
|
||
|
}
|
||
|
/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
|
||
|
if (*l == '&') {
|
||
|
++l;
|
||
|
m->mask = signextend(m, strtol(l, &l, 0));
|
||
|
} else
|
||
|
m->mask = (unsigned long) ~0L;
|
||
|
EATAB;
|
||
|
|
||
|
switch (*l) {
|
||
|
case '>':
|
||
|
case '<':
|
||
|
/* Old-style anding: "0 byte &0x80 dynamically linked" */
|
||
|
case '&':
|
||
|
case '^':
|
||
|
case '=':
|
||
|
m->reln = *l;
|
||
|
++l;
|
||
|
break;
|
||
|
case '!':
|
||
|
if (m->type != STRING) {
|
||
|
m->reln = *l;
|
||
|
++l;
|
||
|
break;
|
||
|
}
|
||
|
/* FALL THROUGH */
|
||
|
default:
|
||
|
if (*l == 'x' && isascii((unsigned char) l[1]) &&
|
||
|
isspace((unsigned char) l[1])) {
|
||
|
m->reln = *l;
|
||
|
++l;
|
||
|
goto GetDesc; /* Bill The Cat */
|
||
|
}
|
||
|
m->reln = '=';
|
||
|
break;
|
||
|
}
|
||
|
EATAB;
|
||
|
|
||
|
if (getvalue(m, &l))
|
||
|
return -1;
|
||
|
/*
|
||
|
* now get last part - the description
|
||
|
*/
|
||
|
GetDesc:
|
||
|
EATAB;
|
||
|
if (l[0] == '\b') {
|
||
|
++l;
|
||
|
m->nospflag = 1;
|
||
|
} else if ((l[0] == '\\') && (l[1] == 'b')) {
|
||
|
++l;
|
||
|
++l;
|
||
|
m->nospflag = 1;
|
||
|
} else
|
||
|
m->nospflag = 0;
|
||
|
// Copy description - until EOL or '#' (for comments)
|
||
|
while (*l != '\0' && *l != '#' && i < MAXDESC-1)
|
||
|
m->desc[i++] = *l++;
|
||
|
m->desc[i] = '\0';
|
||
|
// Remove trailing spaces
|
||
|
while (--i>0 && isspace( m->desc[i] ))
|
||
|
m->desc[i] = '\0';
|
||
|
|
||
|
// old code
|
||
|
//while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ;
|
||
|
|
||
|
#ifdef DEBUG_APPRENTICE
|
||
|
kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
|
||
|
#endif
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Read a numeric value from a pointer, into the value union of a magic
|
||
|
* pointer, according to the magic type. Update the string pointer to point
|
||
|
* just after the number read. Return 0 for success, non-zero for failure.
|
||
|
*/
|
||
|
static int
|
||
|
getvalue(struct magic *m, char **p)
|
||
|
{
|
||
|
int slen;
|
||
|
|
||
|
if (m->type == STRING) {
|
||
|
*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
|
||
|
m->vallen = slen;
|
||
|
} else if (m->reln != 'x')
|
||
|
m->value.l = signextend(m, strtol(*p, p, 0));
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Convert a string containing C character escapes. Stop at an unescaped
|
||
|
* space or tab. Copy the converted version to "p", returning its length in
|
||
|
* *slen. Return updated scan pointer as function result.
|
||
|
*/
|
||
|
static char *
|
||
|
getstr(register char *s, register char *p, int plen, int *slen)
|
||
|
{
|
||
|
char *origs = s,
|
||
|
*origp = p;
|
||
|
char *pmax = p + plen - 1;
|
||
|
register int c;
|
||
|
register int val;
|
||
|
|
||
|
while ((c = *s++) != '\0') {
|
||
|
if (isspace((unsigned char) c))
|
||
|
break;
|
||
|
if (p >= pmax) {
|
||
|
kdError(7018) << "String too long: " << origs << endl;
|
||
|
break;
|
||
|
}
|
||
|
if (c == '\\') {
|
||
|
switch (c = *s++) {
|
||
|
|
||
|
case '\0':
|
||
|
goto out;
|
||
|
|
||
|
default:
|
||
|
*p++ = (char) c;
|
||
|
break;
|
||
|
|
||
|
case 'n':
|
||
|
*p++ = '\n';
|
||
|
break;
|
||
|
|
||
|
case 'r':
|
||
|
*p++ = '\r';
|
||
|
break;
|
||
|
|
||
|
case 'b':
|
||
|
*p++ = '\b';
|
||
|
break;
|
||
|
|
||
|
case 't':
|
||
|
*p++ = '\t';
|
||
|
break;
|
||
|
|
||
|
case 'f':
|
||
|
*p++ = '\f';
|
||
|
break;
|
||
|
|
||
|
case 'v':
|
||
|
*p++ = '\v';
|
||
|
break;
|
||
|
|
||
|
/* \ and up to 3 octal digits */
|
||
|
case '0':
|
||
|
case '1':
|
||
|
case '2':
|
||
|
case '3':
|
||
|
case '4':
|
||
|
case '5':
|
||
|
case '6':
|
||
|
case '7':
|
||
|
val = c - '0';
|
||
|
c = *s++; /* try for 2 */
|
||
|
if (c >= '0' && c <= '7') {
|
||
|
val = (val << 3) | (c - '0');
|
||
|
c = *s++; /* try for 3 */
|
||
|
if (c >= '0' && c <= '7')
|
||
|
val = (val << 3) | (c - '0');
|
||
|
else
|
||
|
--s;
|
||
|
} else
|
||
|
--s;
|
||
|
*p++ = (char) val;
|
||
|
break;
|
||
|
|
||
|
/* \x and up to 3 hex digits */
|
||
|
case 'x':
|
||
|
val = 'x'; /* Default if no digits */
|
||
|
c = hextoint(*s++); /* Get next char */
|
||
|
if (c >= 0) {
|
||
|
val = c;
|
||
|
c = hextoint(*s++);
|
||
|
if (c >= 0) {
|
||
|
val = (val << 4) + c;
|
||
|
c = hextoint(*s++);
|
||
|
if (c >= 0) {
|
||
|
val = (val << 4) + c;
|
||
|
} else
|
||
|
--s;
|
||
|
} else
|
||
|
--s;
|
||
|
} else
|
||
|
--s;
|
||
|
*p++ = (char) val;
|
||
|
break;
|
||
|
}
|
||
|
} else
|
||
|
*p++ = (char) c;
|
||
|
}
|
||
|
out:
|
||
|
*p = '\0';
|
||
|
*slen = p - origp;
|
||
|
//for ( char* foo = origp; foo < p ; ++foo )
|
||
|
// kdDebug(7018) << " " << *foo << endl;
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Single hex char to int; -1 if not a hex char. */
|
||
|
static int
|
||
|
hextoint(int c)
|
||
|
{
|
||
|
if (!isascii((unsigned char) c))
|
||
|
return -1;
|
||
|
if (isdigit((unsigned char) c))
|
||
|
return c - '0';
|
||
|
if ((c >= 'a') && (c <= 'f'))
|
||
|
return c + 10 - 'a';
|
||
|
if ((c >= 'A') && (c <= 'F'))
|
||
|
return c + 10 - 'A';
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Convert the byte order of the data we are looking at
|
||
|
*/
|
||
|
static int
|
||
|
mconvert(union VALUETYPE *p, struct magic *m)
|
||
|
{
|
||
|
switch (m->type) {
|
||
|
case BYTE:
|
||
|
return 1;
|
||
|
case STRING:
|
||
|
/* Null terminate */
|
||
|
p->s[sizeof(p->s) - 1] = '\0';
|
||
|
return 1;
|
||
|
#ifndef WORDS_BIGENDIAN
|
||
|
case SHORT:
|
||
|
#endif
|
||
|
case BESHORT:
|
||
|
p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
|
||
|
return 1;
|
||
|
#ifndef WORDS_BIGENDIAN
|
||
|
case LONG:
|
||
|
case DATE:
|
||
|
#endif
|
||
|
case BELONG:
|
||
|
case BEDATE:
|
||
|
p->l = (long)
|
||
|
((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
|
||
|
return 1;
|
||
|
#ifdef WORDS_BIGENDIAN
|
||
|
case SHORT:
|
||
|
#endif
|
||
|
case LESHORT:
|
||
|
p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
|
||
|
return 1;
|
||
|
#ifdef WORDS_BIGENDIAN
|
||
|
case LONG:
|
||
|
case DATE:
|
||
|
#endif
|
||
|
case LELONG:
|
||
|
case LEDATE:
|
||
|
p->l = (long)
|
||
|
((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
|
||
|
return 1;
|
||
|
default:
|
||
|
kdError(7018) << "mconvert: invalid type " << m->type << endl;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
|
||
|
int nbytes)
|
||
|
{
|
||
|
long offset = m->offset;
|
||
|
switch ( m->type )
|
||
|
{
|
||
|
case BYTE:
|
||
|
if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1
|
||
|
return 0;
|
||
|
break;
|
||
|
case SHORT:
|
||
|
case BESHORT:
|
||
|
case LESHORT:
|
||
|
if ( offset + 2 > nbytes-1 )
|
||
|
return 0;
|
||
|
break;
|
||
|
case LONG:
|
||
|
case BELONG:
|
||
|
case LELONG:
|
||
|
case DATE:
|
||
|
case BEDATE:
|
||
|
case LEDATE:
|
||
|
if ( offset + 4 > nbytes-1 )
|
||
|
return 0;
|
||
|
break;
|
||
|
case STRING:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// The file length might be < sizeof(union VALUETYPE) (David)
|
||
|
// -> pad with zeros (the 'file' command does it this way)
|
||
|
// Thanks to Stan Covington <stan@calderasystems.com> for detailed report
|
||
|
if (offset + (int)sizeof(union VALUETYPE) > nbytes)
|
||
|
{
|
||
|
int have = nbytes - offset;
|
||
|
memset(p, 0, sizeof(union VALUETYPE));
|
||
|
if (have > 0)
|
||
|
memcpy(p, s + offset, have);
|
||
|
} else
|
||
|
memcpy(p, s + offset, sizeof(union VALUETYPE));
|
||
|
|
||
|
if (!mconvert(p, m))
|
||
|
return 0;
|
||
|
|
||
|
if (m->flag & INDIR) {
|
||
|
|
||
|
switch (m->in.type) {
|
||
|
case BYTE:
|
||
|
offset = p->b + m->in.offset;
|
||
|
break;
|
||
|
case SHORT:
|
||
|
offset = p->h + m->in.offset;
|
||
|
break;
|
||
|
case LONG:
|
||
|
offset = p->l + m->in.offset;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (offset + (int)sizeof(union VALUETYPE) > nbytes)
|
||
|
return 0;
|
||
|
|
||
|
memcpy(p, s + offset, sizeof(union VALUETYPE));
|
||
|
|
||
|
if (!mconvert(p, m))
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
mcheck(union VALUETYPE *p, struct magic *m)
|
||
|
{
|
||
|
register unsigned long l = m->value.l;
|
||
|
register unsigned long v;
|
||
|
int matched;
|
||
|
|
||
|
if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
|
||
|
kdError(7018) << "BOINK" << endl;
|
||
|
return 1;
|
||
|
}
|
||
|
switch (m->type) {
|
||
|
case BYTE:
|
||
|
v = p->b;
|
||
|
break;
|
||
|
|
||
|
case SHORT:
|
||
|
case BESHORT:
|
||
|
case LESHORT:
|
||
|
v = p->h;
|
||
|
break;
|
||
|
|
||
|
case LONG:
|
||
|
case BELONG:
|
||
|
case LELONG:
|
||
|
case DATE:
|
||
|
case BEDATE:
|
||
|
case LEDATE:
|
||
|
v = p->l;
|
||
|
break;
|
||
|
|
||
|
case STRING:
|
||
|
l = 0;
|
||
|
/*
|
||
|
* What we want here is: v = strncmp(m->value.s, p->s,
|
||
|
* m->vallen); but ignoring any nulls. bcmp doesn't give
|
||
|
* -/+/0 and isn't universally available anyway.
|
||
|
*/
|
||
|
v = 0;
|
||
|
{
|
||
|
register unsigned char *a = (unsigned char *) m->value.s;
|
||
|
register unsigned char *b = (unsigned char *) p->s;
|
||
|
register int len = m->vallen;
|
||
|
Q_ASSERT(len);
|
||
|
|
||
|
while (--len >= 0)
|
||
|
if ((v = *b++ - *a++) != 0)
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
kdError(7018) << "mcheck: invalid type " << m->type << endl;
|
||
|
return 0; /* NOTREACHED */
|
||
|
}
|
||
|
#if 0
|
||
|
qDebug("Before signextend %08x", v);
|
||
|
#endif
|
||
|
v = signextend(m, v) & m->mask;
|
||
|
#if 0
|
||
|
qDebug("After signextend %08x", v);
|
||
|
#endif
|
||
|
|
||
|
switch (m->reln) {
|
||
|
case 'x':
|
||
|
matched = 1;
|
||
|
break;
|
||
|
|
||
|
case '!':
|
||
|
matched = v != l;
|
||
|
break;
|
||
|
|
||
|
case '=':
|
||
|
matched = v == l;
|
||
|
break;
|
||
|
|
||
|
case '>':
|
||
|
if (m->flag & UNSIGNED)
|
||
|
matched = v > l;
|
||
|
else
|
||
|
matched = (long) v > (long) l;
|
||
|
break;
|
||
|
|
||
|
case '<':
|
||
|
if (m->flag & UNSIGNED)
|
||
|
matched = v < l;
|
||
|
else
|
||
|
matched = (long) v < (long) l;
|
||
|
break;
|
||
|
|
||
|
case '&':
|
||
|
matched = (v & l) == l;
|
||
|
break;
|
||
|
|
||
|
case '^':
|
||
|
matched = (v & l) != l;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
matched = 0;
|
||
|
kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
|
||
|
break; /* NOTREACHED */
|
||
|
}
|
||
|
|
||
|
return matched;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* magic_process - process input file fn. Opens the file and reads a
|
||
|
* fixed-size buffer to begin processing the contents.
|
||
|
*/
|
||
|
|
||
|
void process(struct config_rec* conf, const QString & fn)
|
||
|
{
|
||
|
int fd = 0;
|
||
|
unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
|
||
|
KDE_struct_stat sb;
|
||
|
int nbytes = 0; /* number of bytes read from a datafile */
|
||
|
int tagbytes = 0; /* size of prefixed tag */
|
||
|
QCString fileName = QFile::encodeName( fn );
|
||
|
|
||
|
/*
|
||
|
* first try judging the file based on its filesystem status
|
||
|
*/
|
||
|
if (fsmagic(conf, fileName, &sb) != 0) {
|
||
|
//resultBuf += "\n";
|
||
|
return;
|
||
|
}
|
||
|
if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
|
||
|
/* We can't open it, but we were able to stat it. */
|
||
|
/*
|
||
|
* if (sb.st_mode & 0002) addResult("writable, ");
|
||
|
* if (sb.st_mode & 0111) addResult("executable, ");
|
||
|
*/
|
||
|
//kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl;
|
||
|
conf->resultBuf = MIME_BINARY_UNREADABLE;
|
||
|
return;
|
||
|
}
|
||
|
/*
|
||
|
* try looking at the first HOWMANY bytes
|
||
|
*/
|
||
|
if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
|
||
|
kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
|
||
|
conf->resultBuf = MIME_BINARY_UNREADABLE;
|
||
|
(void)close(fd);
|
||
|
return;
|
||
|
}
|
||
|
if ((tagbytes = tagmagic(buf, nbytes))) {
|
||
|
// Read buffer at new position
|
||
|
lseek(fd, tagbytes, SEEK_SET);
|
||
|
nbytes = read(fd, (char*)buf, HOWMANY);
|
||
|
if (nbytes < 0) {
|
||
|
conf->resultBuf = MIME_BINARY_UNREADABLE;
|
||
|
(void)close(fd);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
if (nbytes == 0) {
|
||
|
conf->resultBuf = MIME_BINARY_ZEROSIZE;
|
||
|
} else {
|
||
|
buf[nbytes++] = '\0'; /* null-terminate it */
|
||
|
tryit(conf, buf, nbytes);
|
||
|
}
|
||
|
|
||
|
if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
|
||
|
{
|
||
|
/*
|
||
|
* Try to restore access, modification times if read it.
|
||
|
* This changes the "change" time (ctime), but we can't do anything
|
||
|
* about that.
|
||
|
*/
|
||
|
struct utimbuf utbuf;
|
||
|
utbuf.actime = sb.st_atime;
|
||
|
utbuf.modtime = sb.st_mtime;
|
||
|
(void) utime(fileName, &utbuf);
|
||
|
}
|
||
|
(void) close(fd);
|
||
|
}
|
||
|
|
||
|
|
||
|
static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
|
||
|
{
|
||
|
/* try tests in /etc/magic (or surrogate magic file) */
|
||
|
if (match(conf, buf, nb))
|
||
|
return;
|
||
|
|
||
|
/* try known keywords, check for ascii-ness too. */
|
||
|
if (ascmagic(conf, buf, nb) == 1)
|
||
|
return;
|
||
|
|
||
|
/* see if it's plain text */
|
||
|
if (textmagic(conf, buf, nb))
|
||
|
return;
|
||
|
|
||
|
/* abandon hope, all ye who remain here */
|
||
|
conf->resultBuf = MIME_BINARY_UNKNOWN;
|
||
|
conf->accuracy = 0;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
|
||
|
{
|
||
|
int ret = 0;
|
||
|
|
||
|
/*
|
||
|
* Fstat is cheaper but fails for files you don't have read perms on.
|
||
|
* On 4.2BSD and similar systems, use lstat() to identify symlinks.
|
||
|
*/
|
||
|
ret = KDE_lstat(fn, sb); /* don't merge into if; see "ret =" above */
|
||
|
|
||
|
if (ret) {
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
/*
|
||
|
* if (sb->st_mode & S_ISUID) resultBuf += "setuid ";
|
||
|
* if (sb->st_mode & S_ISGID) resultBuf += "setgid ";
|
||
|
* if (sb->st_mode & S_ISVTX) resultBuf += "sticky ";
|
||
|
*/
|
||
|
|
||
|
switch (sb->st_mode & S_IFMT) {
|
||
|
case S_IFDIR:
|
||
|
conf->resultBuf = MIME_INODE_DIR;
|
||
|
return 1;
|
||
|
case S_IFCHR:
|
||
|
conf->resultBuf = MIME_INODE_CDEV;
|
||
|
return 1;
|
||
|
case S_IFBLK:
|
||
|
conf->resultBuf = MIME_INODE_BDEV;
|
||
|
return 1;
|
||
|
/* TODO add code to handle V7 MUX and Blit MUX files */
|
||
|
#ifdef S_IFIFO
|
||
|
case S_IFIFO:
|
||
|
conf->resultBuf = MIME_INODE_FIFO;
|
||
|
return 1;
|
||
|
#endif
|
||
|
#ifdef S_IFLNK
|
||
|
case S_IFLNK:
|
||
|
{
|
||
|
char buf[BUFSIZ + BUFSIZ + 4];
|
||
|
register int nch;
|
||
|
KDE_struct_stat tstatbuf;
|
||
|
|
||
|
if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
|
||
|
conf->resultBuf = MIME_INODE_LINK;
|
||
|
//conf->resultBuf += "\nunreadable";
|
||
|
return 1;
|
||
|
}
|
||
|
buf[nch] = '\0'; /* readlink(2) forgets this */
|
||
|
/* If broken symlink, say so and quit early. */
|
||
|
if (*buf == '/') {
|
||
|
if (KDE_stat(buf, &tstatbuf) < 0) {
|
||
|
conf->resultBuf = MIME_INODE_LINK;
|
||
|
//conf->resultBuf += "\nbroken";
|
||
|
return 1;
|
||
|
}
|
||
|
} else {
|
||
|
char *tmp;
|
||
|
char buf2[BUFSIZ + BUFSIZ + 4];
|
||
|
|
||
|
strncpy(buf2, fn, BUFSIZ);
|
||
|
buf2[BUFSIZ] = 0;
|
||
|
|
||
|
if ((tmp = strrchr(buf2, '/')) == NULL) {
|
||
|
tmp = buf; /* in current dir */
|
||
|
} else {
|
||
|
/* dir part plus (rel.) link */
|
||
|
*++tmp = '\0';
|
||
|
strcat(buf2, buf);
|
||
|
tmp = buf2;
|
||
|
}
|
||
|
if (KDE_stat(tmp, &tstatbuf) < 0) {
|
||
|
conf->resultBuf = MIME_INODE_LINK;
|
||
|
//conf->resultBuf += "\nbroken";
|
||
|
return 1;
|
||
|
} else
|
||
|
strcpy(buf, tmp);
|
||
|
}
|
||
|
if (conf->followLinks)
|
||
|
process( conf, QFile::decodeName( buf ) );
|
||
|
else
|
||
|
conf->resultBuf = MIME_INODE_LINK;
|
||
|
return 1;
|
||
|
}
|
||
|
return 1;
|
||
|
#endif
|
||
|
#ifdef S_IFSOCK
|
||
|
#ifndef __COHERENT__
|
||
|
case S_IFSOCK:
|
||
|
conf->resultBuf = MIME_INODE_SOCK;
|
||
|
return 1;
|
||
|
#endif
|
||
|
#endif
|
||
|
case S_IFREG:
|
||
|
break;
|
||
|
default:
|
||
|
kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
|
||
|
/* NOTREACHED */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* regular file, check next possibility
|
||
|
*/
|
||
|
if (sb->st_size == 0) {
|
||
|
conf->resultBuf = MIME_BINARY_ZEROSIZE;
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Go through the whole list, stopping if you find a match. Process all the
|
||
|
* continuations of that match before returning.
|
||
|
*
|
||
|
* We support multi-level continuations:
|
||
|
*
|
||
|
* At any time when processing a successful top-level match, there is a current
|
||
|
* continuation level; it represents the level of the last successfully
|
||
|
* matched continuation.
|
||
|
*
|
||
|
* Continuations above that level are skipped as, if we see one, it means that
|
||
|
* the continuation that controls them - i.e, the lower-level continuation
|
||
|
* preceding them - failed to match.
|
||
|
*
|
||
|
* Continuations below that level are processed as, if we see one, it means
|
||
|
* we've finished processing or skipping higher-level continuations under the
|
||
|
* control of a successful or unsuccessful lower-level continuation, and are
|
||
|
* now seeing the next lower-level continuation and should process it. The
|
||
|
* current continuation level reverts to the level of the one we're seeing.
|
||
|
*
|
||
|
* Continuations at the current level are processed as, if we see one, there's
|
||
|
* no lower-level continuation that may have failed.
|
||
|
*
|
||
|
* If a continuation matches, we bump the current continuation level so that
|
||
|
* higher-level continuations are processed.
|
||
|
*/
|
||
|
static int
|
||
|
match(struct config_rec* conf, unsigned char *s, int nbytes)
|
||
|
{
|
||
|
int cont_level = 0;
|
||
|
union VALUETYPE p;
|
||
|
struct magic *m;
|
||
|
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
|
||
|
for (m = conf->magic; m; m = m->next) {
|
||
|
if (isprint((((unsigned long) m) >> 24) & 255) &&
|
||
|
isprint((((unsigned long) m) >> 16) & 255) &&
|
||
|
isprint((((unsigned long) m) >> 8) & 255) &&
|
||
|
isprint(((unsigned long) m) & 255)) {
|
||
|
kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
for (m = conf->magic; m; m = m->next) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
|
||
|
#endif
|
||
|
memset(&p, 0, sizeof(union VALUETYPE));
|
||
|
|
||
|
/* check if main entry matches */
|
||
|
if (!mget(&p, s, m, nbytes) ||
|
||
|
!mcheck(&p, m)) {
|
||
|
struct magic *m_cont;
|
||
|
|
||
|
/*
|
||
|
* main entry didn't match, flush its continuations
|
||
|
*/
|
||
|
if (!m->next || (m->next->cont_level == 0)) {
|
||
|
continue;
|
||
|
}
|
||
|
m_cont = m->next;
|
||
|
while (m_cont && (m_cont->cont_level != 0)) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
|
||
|
#endif
|
||
|
/*
|
||
|
* this trick allows us to keep *m in sync
|
||
|
* when the continue advances the pointer
|
||
|
*/
|
||
|
m = m_cont;
|
||
|
m_cont = m_cont->next;
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
/* if we get here, the main entry rule was a match */
|
||
|
/* this will be the last run through the loop */
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
|
||
|
#endif
|
||
|
|
||
|
/* remember the match */
|
||
|
conf->resultBuf = m->desc;
|
||
|
|
||
|
cont_level++;
|
||
|
/*
|
||
|
* while (m && m->next && m->next->cont_level != 0 && ( m =
|
||
|
* m->next ))
|
||
|
*/
|
||
|
m = m->next;
|
||
|
while (m && (m->cont_level != 0)) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
|
||
|
#endif
|
||
|
if (cont_level >= m->cont_level) {
|
||
|
if (cont_level > m->cont_level) {
|
||
|
/*
|
||
|
* We're at the end of the level
|
||
|
* "cont_level" continuations.
|
||
|
*/
|
||
|
cont_level = m->cont_level;
|
||
|
}
|
||
|
if (mget(&p, s, m, nbytes) &&
|
||
|
mcheck(&p, m)) {
|
||
|
/*
|
||
|
* This continuation matched. Print
|
||
|
* its message, with a blank before
|
||
|
* it if the previous item printed
|
||
|
* and this item isn't empty.
|
||
|
*/
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "continuation matched" << endl;
|
||
|
#endif
|
||
|
conf->resultBuf = m->desc;
|
||
|
cont_level++;
|
||
|
}
|
||
|
}
|
||
|
/* move to next continuation record */
|
||
|
m = m->next;
|
||
|
}
|
||
|
// KDE-specific: need an actual mimetype for a real match
|
||
|
// If we only matched a rule with continuations but no mimetype, it's not a match
|
||
|
if ( !conf->resultBuf.isEmpty() )
|
||
|
{
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: matched" << endl;
|
||
|
#endif
|
||
|
return 1; /* all through */
|
||
|
}
|
||
|
}
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "match: failed" << endl;
|
||
|
#endif
|
||
|
return 0; /* no match at all */
|
||
|
}
|
||
|
|
||
|
// Try to parse prefixed tags before matching on content
|
||
|
// Sofar only ID3v2 tags (<=.4) are handled
|
||
|
static int tagmagic(unsigned char *buf, int nbytes)
|
||
|
{
|
||
|
if(nbytes<40) return 0;
|
||
|
if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
|
||
|
int size = 10;
|
||
|
// Sanity (known version, no unknown flags)
|
||
|
if(buf[3] > 4) return 0;
|
||
|
if(buf[5] & 0x0F) return 0;
|
||
|
// Tag has v4 footer
|
||
|
if(buf[5] & 0x10) size += 10;
|
||
|
// Calculated syncsafe size
|
||
|
size += buf[9];
|
||
|
size += buf[8] << 7;
|
||
|
size += buf[7] << 14;
|
||
|
size += buf[6] << 21;
|
||
|
return size;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
struct Token {
|
||
|
char *data;
|
||
|
int length;
|
||
|
};
|
||
|
|
||
|
struct Tokenizer
|
||
|
{
|
||
|
Tokenizer(char* buf, int nbytes) {
|
||
|
data = buf;
|
||
|
length = nbytes;
|
||
|
pos = 0;
|
||
|
}
|
||
|
bool isNewLine() {
|
||
|
return newline;
|
||
|
}
|
||
|
Token* nextToken() {
|
||
|
if (pos == 0)
|
||
|
newline = true;
|
||
|
else
|
||
|
newline = false;
|
||
|
token.data = data+pos;
|
||
|
token.length = 0;
|
||
|
while(pos<length) {
|
||
|
switch (data[pos]) {
|
||
|
case '\n':
|
||
|
newline = true;
|
||
|
case '\0':
|
||
|
case '\t':
|
||
|
case ' ':
|
||
|
case '\r':
|
||
|
case '\f':
|
||
|
case ',':
|
||
|
case ';':
|
||
|
case '>':
|
||
|
if (token.length == 0) token.data++;
|
||
|
else
|
||
|
return &token;
|
||
|
break;
|
||
|
default:
|
||
|
token.length++;
|
||
|
}
|
||
|
pos++;
|
||
|
}
|
||
|
return &token;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
Token token;
|
||
|
char* data;
|
||
|
int length;
|
||
|
int pos;
|
||
|
bool newline;
|
||
|
};
|
||
|
|
||
|
|
||
|
/* an optimization over plain strcmp() */
|
||
|
//#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
|
||
|
static inline bool STREQ(const Token *token, const char *b) {
|
||
|
const char *a = token->data;
|
||
|
int len = token->length;
|
||
|
if (a == b) return true;
|
||
|
while(*a && *b && len > 0) {
|
||
|
if (*a != *b) return false;
|
||
|
a++; b++; len--;
|
||
|
}
|
||
|
return (len == 0 && *b == 0);
|
||
|
}
|
||
|
|
||
|
static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
|
||
|
{
|
||
|
int i;
|
||
|
double pct, maxpct, pctsum;
|
||
|
double pcts[NTYPES];
|
||
|
int mostaccurate, tokencount;
|
||
|
int typeset, jonly, conly, jconly, objconly, cpponly;
|
||
|
int has_escapes = 0;
|
||
|
//unsigned char *s;
|
||
|
//char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */
|
||
|
|
||
|
/* these are easy, do them first */
|
||
|
conf->accuracy = 70;
|
||
|
|
||
|
/*
|
||
|
* for troff, look for . + letter + letter or .\"; this must be done
|
||
|
* to disambiguate tar archives' ./file and other trash from real
|
||
|
* troff input.
|
||
|
*/
|
||
|
if (*buf == '.') {
|
||
|
unsigned char *tp = buf + 1;
|
||
|
|
||
|
while (isascii(*tp) && isspace(*tp))
|
||
|
++tp; /* skip leading whitespace */
|
||
|
if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
|
||
|
isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
|
||
|
conf->resultBuf = MIME_APPL_TROFF;
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
if ((*buf == 'c' || *buf == 'C') &&
|
||
|
isascii(*(buf + 1)) && isspace(*(buf + 1))) {
|
||
|
/* Fortran */
|
||
|
conf->resultBuf = MIME_TEXT_FORTRAN;
|
||
|
return 1;
|
||
|
}
|
||
|
assert(nbytes-1 < HOWMANY + 1);
|
||
|
/* look for tokens - this is expensive! */
|
||
|
has_escapes = (memchr(buf, '\033', nbytes) != NULL);
|
||
|
Tokenizer tokenizer((char*)buf, nbytes);
|
||
|
const Token* token;
|
||
|
bool linecomment = false, blockcomment = false;
|
||
|
const struct names *p;
|
||
|
int typecount[NTYPES];
|
||
|
/*
|
||
|
* Fritz:
|
||
|
* Try a little harder on C/C++/Java.
|
||
|
*/
|
||
|
memset(&typecount, 0, sizeof(typecount));
|
||
|
typeset = 0;
|
||
|
jonly = 0;
|
||
|
conly = 0;
|
||
|
jconly = 0;
|
||
|
objconly = 0;
|
||
|
cpponly = 0;
|
||
|
tokencount = 0;
|
||
|
bool foundClass = false; // mandatory for java
|
||
|
// first collect all possible types and count matches
|
||
|
// we stop at '>' too, because of "<title>blah</title>" on HTML pages
|
||
|
while ((token = tokenizer.nextToken())->length > 0) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
|
||
|
#endif
|
||
|
if (linecomment && tokenizer.isNewLine())
|
||
|
linecomment = false;
|
||
|
if (blockcomment && STREQ(token, "*/")) {
|
||
|
blockcomment = false;
|
||
|
continue;
|
||
|
}
|
||
|
for (p = names; p->name ; p++) {
|
||
|
if (STREQ(token, p->name)) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
|
||
|
#endif
|
||
|
tokencount++;
|
||
|
typeset |= p->type;
|
||
|
if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
|
||
|
if (linecomment || blockcomment) {
|
||
|
continue;
|
||
|
}
|
||
|
else {
|
||
|
switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
|
||
|
{
|
||
|
case L_JAVA:
|
||
|
jonly++;
|
||
|
break;
|
||
|
case L_OBJC:
|
||
|
objconly++;
|
||
|
break;
|
||
|
case L_CPP:
|
||
|
cpponly++;
|
||
|
break;
|
||
|
case (L_CPP|L_JAVA):
|
||
|
jconly++;
|
||
|
if ( !foundClass && STREQ(token, "class") )
|
||
|
foundClass = true;
|
||
|
break;
|
||
|
case (L_C|L_CPP):
|
||
|
conly++;
|
||
|
break;
|
||
|
default:
|
||
|
if (STREQ(token, "//")) linecomment = true;
|
||
|
if (STREQ(token, "/*")) blockcomment = true;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
for (i = 0; i < (int)NTYPES; i++) {
|
||
|
if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
|
||
|
conf->accuracy = 60;
|
||
|
if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
|
||
|
#endif
|
||
|
if (jonly > 1 && foundClass) {
|
||
|
// At least two java-only tokens have matched, including "class"
|
||
|
conf->resultBuf = QString(types[P_JAVA].type);
|
||
|
return 1;
|
||
|
}
|
||
|
if (jconly > 1) {
|
||
|
// At least two non-C (only C++ or Java) token have matched.
|
||
|
if (typecount[P_JAVA] < typecount[P_CPP])
|
||
|
conf->resultBuf = QString(types[P_CPP].type);
|
||
|
else
|
||
|
conf->resultBuf = QString(types[P_JAVA].type);
|
||
|
return 1;
|
||
|
}
|
||
|
if (conly + cpponly > 1) {
|
||
|
// Either C or C++.
|
||
|
if (cpponly > 0)
|
||
|
conf->resultBuf = QString(types[P_CPP].type);
|
||
|
else
|
||
|
conf->resultBuf = QString(types[P_C].type);
|
||
|
return 1;
|
||
|
}
|
||
|
if (objconly > 0) {
|
||
|
conf->resultBuf = QString(types[P_OBJC].type);
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Neither C, C++ or Java (or all of them without able to distinguish):
|
||
|
* Simply take the token-class with the highest
|
||
|
* matchcount > 0
|
||
|
*/
|
||
|
mostaccurate = -1;
|
||
|
maxpct = pctsum = 0.0;
|
||
|
for (i = 0; i < (int)NTYPES; i++) {
|
||
|
if (typecount[i] > 1) { // one word is not enough, we need at least two
|
||
|
pct = (double)typecount[i] / (double)types[i].kwords *
|
||
|
(double)types[i].weight;
|
||
|
pcts[i] = pct;
|
||
|
pctsum += pct;
|
||
|
if (pct > maxpct) {
|
||
|
maxpct = pct;
|
||
|
mostaccurate = i;
|
||
|
}
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
if (mostaccurate >= 0) {
|
||
|
if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java
|
||
|
{
|
||
|
conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
|
||
|
#endif
|
||
|
conf->resultBuf = QString(types[mostaccurate].type);
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
switch (is_tar(buf, nbytes)) {
|
||
|
case 1:
|
||
|
/* V7 tar archive */
|
||
|
conf->resultBuf = MIME_APPL_TAR;
|
||
|
conf->accuracy = 90;
|
||
|
return 1;
|
||
|
case 2:
|
||
|
/* POSIX tar archive */
|
||
|
conf->resultBuf = MIME_APPL_TAR;
|
||
|
conf->accuracy = 90;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
for (i = 0; i < nbytes; i++) {
|
||
|
if (!isascii(*(buf + i)))
|
||
|
return 0; /* not all ascii */
|
||
|
}
|
||
|
|
||
|
/* all else fails, but it is ascii... */
|
||
|
conf->accuracy = 90;
|
||
|
if (has_escapes) {
|
||
|
/* text with escape sequences */
|
||
|
/* we leave this open for further differentiation later */
|
||
|
conf->resultBuf = MIME_TEXT_UNKNOWN;
|
||
|
} else {
|
||
|
/* plain text */
|
||
|
conf->resultBuf = MIME_TEXT_PLAIN;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
/* Maximal length of a line we consider "reasonable". */
|
||
|
#define TEXT_MAXLINELEN 300
|
||
|
|
||
|
// This code is taken from the "file" command, where it is licensed
|
||
|
// in the "beer-ware license" :-)
|
||
|
// Original author: <joerg@FreeBSD.ORG>
|
||
|
// Simplified by David Faure to avoid the static array char[256].
|
||
|
static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
|
||
|
{
|
||
|
int i;
|
||
|
unsigned char *cp;
|
||
|
|
||
|
nbytes--;
|
||
|
|
||
|
/* First, look whether there are "unreasonable" characters. */
|
||
|
for (i = 0, cp = buf; i < nbytes; i++, cp++)
|
||
|
if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
|
||
|
return 0;
|
||
|
|
||
|
/* Now, look whether the file consists of lines of
|
||
|
* "reasonable" length. */
|
||
|
|
||
|
for (i = 0; i < nbytes;) {
|
||
|
cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
|
||
|
if (cp == NULL) {
|
||
|
/* Don't fail if we hit the end of buffer. */
|
||
|
if (i + TEXT_MAXLINELEN >= nbytes)
|
||
|
break;
|
||
|
else
|
||
|
return 0;
|
||
|
}
|
||
|
if (cp - buf > TEXT_MAXLINELEN)
|
||
|
return 0;
|
||
|
i += (cp - buf + 1);
|
||
|
buf = cp + 1;
|
||
|
}
|
||
|
conf->resultBuf = MIME_TEXT_PLAIN;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* is_tar() -- figure out whether file is a tar archive.
|
||
|
*
|
||
|
* Stolen (by author of file utility) from the public domain tar program: Public
|
||
|
* Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
|
||
|
*
|
||
|
* @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
|
||
|
* 1997/06/24 00:41:02 ikluft Exp ikluft $
|
||
|
*
|
||
|
* Comments changed and some code/comments reformatted for file command by Ian
|
||
|
* Darwin.
|
||
|
*/
|
||
|
|
||
|
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
|
||
|
|
||
|
/*
|
||
|
* Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
|
||
|
* old UNIX tar file, 2 for Unix Std (POSIX) tar file.
|
||
|
*/
|
||
|
|
||
|
static int
|
||
|
is_tar(unsigned char *buf, int nbytes)
|
||
|
{
|
||
|
register union record *header = (union record *) buf;
|
||
|
register int i;
|
||
|
register long sum,
|
||
|
recsum;
|
||
|
register char *p;
|
||
|
|
||
|
if (nbytes < (int)sizeof(union record))
|
||
|
return 0;
|
||
|
|
||
|
recsum = from_oct(8, header->header.chksum);
|
||
|
|
||
|
sum = 0;
|
||
|
p = header->charptr;
|
||
|
for (i = sizeof(union record); --i >= 0;) {
|
||
|
/*
|
||
|
* We can't use unsigned char here because of old compilers,
|
||
|
* e.g. V7.
|
||
|
*/
|
||
|
sum += 0xFF & *p++;
|
||
|
}
|
||
|
|
||
|
/* Adjust checksum to count the "chksum" field as blanks. */
|
||
|
for (i = sizeof(header->header.chksum); --i >= 0;)
|
||
|
sum -= 0xFF & header->header.chksum[i];
|
||
|
sum += ' ' * sizeof header->header.chksum;
|
||
|
|
||
|
if (sum != recsum)
|
||
|
return 0; /* Not a tar archive */
|
||
|
|
||
|
if (0 == strcmp(header->header.magic, TMAGIC))
|
||
|
return 2; /* Unix Standard tar archive */
|
||
|
|
||
|
return 1; /* Old fashioned tar archive */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Quick and dirty octal conversion.
|
||
|
*
|
||
|
* Result is -1 if the field is invalid (all blank, or nonoctal).
|
||
|
*/
|
||
|
static long
|
||
|
from_oct(int digs, char *where)
|
||
|
{
|
||
|
register long value;
|
||
|
|
||
|
while (isspace(*where)) { /* Skip spaces */
|
||
|
where++;
|
||
|
if (--digs <= 0)
|
||
|
return -1; /* All blank field */
|
||
|
}
|
||
|
value = 0;
|
||
|
while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
|
||
|
value = (value << 3) | (*where++ - '0');
|
||
|
--digs;
|
||
|
}
|
||
|
|
||
|
if (digs > 0 && *where && !isspace(*where))
|
||
|
return -1; /* Ended on non-space/nul */
|
||
|
|
||
|
return value;
|
||
|
}
|
||
|
|
||
|
KMimeMagic::KMimeMagic()
|
||
|
{
|
||
|
// Magic file detection init
|
||
|
QString mimefile = locate( "mime", "magic" );
|
||
|
init( mimefile );
|
||
|
// Add snippets from share/config/magic/*
|
||
|
QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
|
||
|
for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
|
||
|
if ( !mergeConfig( *it ) )
|
||
|
kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
|
||
|
}
|
||
|
|
||
|
KMimeMagic::KMimeMagic(const QString & _configfile)
|
||
|
{
|
||
|
init( _configfile );
|
||
|
}
|
||
|
|
||
|
void KMimeMagic::init( const QString& _configfile )
|
||
|
{
|
||
|
int result;
|
||
|
conf = new config_rec;
|
||
|
|
||
|
/* set up the magic list (empty) */
|
||
|
conf->magic = conf->last = NULL;
|
||
|
magicResult = NULL;
|
||
|
conf->followLinks = false;
|
||
|
|
||
|
conf->utimeConf = 0L; // created on demand
|
||
|
/* on the first time through we read the magic file */
|
||
|
result = apprentice(_configfile);
|
||
|
if (result == -1)
|
||
|
return;
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
test_table();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* The destructor.
|
||
|
* Free the magic-table and other resources.
|
||
|
*/
|
||
|
KMimeMagic::~KMimeMagic()
|
||
|
{
|
||
|
if (conf) {
|
||
|
struct magic *p = conf->magic;
|
||
|
struct magic *q;
|
||
|
while (p) {
|
||
|
q = p;
|
||
|
p = p->next;
|
||
|
free(q);
|
||
|
}
|
||
|
delete conf->utimeConf;
|
||
|
delete conf;
|
||
|
}
|
||
|
delete magicResult;
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
KMimeMagic::mergeConfig(const QString & _configfile)
|
||
|
{
|
||
|
kdDebug(7018) << k_funcinfo << _configfile << endl;
|
||
|
int result;
|
||
|
|
||
|
if (_configfile.isEmpty())
|
||
|
return false;
|
||
|
result = apprentice(_configfile);
|
||
|
if (result == -1) {
|
||
|
return false;
|
||
|
}
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
test_table();
|
||
|
#endif
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
KMimeMagic::mergeBufConfig(char * _configbuf)
|
||
|
{
|
||
|
int result;
|
||
|
|
||
|
if (conf) {
|
||
|
result = buff_apprentice(_configbuf);
|
||
|
if (result == -1)
|
||
|
return false;
|
||
|
#ifdef MIME_MAGIC_DEBUG_TABLE
|
||
|
test_table();
|
||
|
#endif
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
KMimeMagic::setFollowLinks( bool _enable )
|
||
|
{
|
||
|
conf->followLinks = _enable;
|
||
|
}
|
||
|
|
||
|
KMimeMagicResult *
|
||
|
KMimeMagic::findBufferType(const QByteArray &array)
|
||
|
{
|
||
|
unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
|
||
|
|
||
|
conf->resultBuf = QString::null;
|
||
|
if ( !magicResult )
|
||
|
magicResult = new KMimeMagicResult();
|
||
|
magicResult->setInvalid();
|
||
|
conf->accuracy = 100;
|
||
|
|
||
|
int nbytes = array.size();
|
||
|
|
||
|
if (nbytes > HOWMANY)
|
||
|
nbytes = HOWMANY;
|
||
|
memcpy(buf, array.data(), nbytes);
|
||
|
if (nbytes == 0) {
|
||
|
conf->resultBuf = MIME_BINARY_ZEROSIZE;
|
||
|
} else {
|
||
|
buf[nbytes++] = '\0'; /* null-terminate it */
|
||
|
tryit(conf, buf, nbytes);
|
||
|
}
|
||
|
/* if we have any results, put them in the request structure */
|
||
|
magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
|
||
|
magicResult->setAccuracy(conf->accuracy);
|
||
|
return magicResult;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
refineResult(KMimeMagicResult *r, const QString & _filename)
|
||
|
{
|
||
|
QString tmp = r->mimeType();
|
||
|
if (tmp.isEmpty())
|
||
|
return;
|
||
|
if ( tmp == "text/x-c" || tmp == "text/x-objc" )
|
||
|
{
|
||
|
if ( _filename.right(2) == ".h" )
|
||
|
tmp += "hdr";
|
||
|
else
|
||
|
tmp += "src";
|
||
|
r->setMimeType(tmp);
|
||
|
}
|
||
|
else
|
||
|
if ( tmp == "text/x-c++" )
|
||
|
{
|
||
|
if ( _filename.endsWith(".h")
|
||
|
|| _filename.endsWith(".hh")
|
||
|
|| _filename.endsWith(".H")
|
||
|
|| !_filename.right(4).contains('.'))
|
||
|
tmp += "hdr";
|
||
|
else
|
||
|
tmp += "src";
|
||
|
r->setMimeType(tmp);
|
||
|
}
|
||
|
else
|
||
|
if ( tmp == "application/x-sharedlib" )
|
||
|
{
|
||
|
if ( _filename.find( ".so" ) == -1 )
|
||
|
{
|
||
|
tmp = "application/x-executable";
|
||
|
r->setMimeType( tmp );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
KMimeMagicResult *
|
||
|
KMimeMagic::findBufferFileType( const QByteArray &data,
|
||
|
const QString &fn)
|
||
|
{
|
||
|
KMimeMagicResult * r = findBufferType( data );
|
||
|
refineResult(r, fn);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Find the content-type of the given file.
|
||
|
*/
|
||
|
KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
|
||
|
{
|
||
|
#ifdef DEBUG_MIMEMAGIC
|
||
|
kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
|
||
|
#endif
|
||
|
conf->resultBuf = QString::null;
|
||
|
|
||
|
if ( !magicResult )
|
||
|
magicResult = new KMimeMagicResult();
|
||
|
magicResult->setInvalid();
|
||
|
conf->accuracy = 100;
|
||
|
|
||
|
if ( !conf->utimeConf )
|
||
|
conf->utimeConf = new KMimeMagicUtimeConf();
|
||
|
|
||
|
/* process it based on the file contents */
|
||
|
process(conf, fn );
|
||
|
|
||
|
/* if we have any results, put them in the request structure */
|
||
|
//finishResult();
|
||
|
magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
|
||
|
magicResult->setAccuracy(conf->accuracy);
|
||
|
refineResult(magicResult, fn);
|
||
|
return magicResult;
|
||
|
}
|