kjs: use libpcre2 instead of libpcre

Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
3 months ago · 7740e825a6
parent b59d51c679
commit 7740e825a6
8 changed files with 163 additions and 178 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -111,7 +111,7 @@ option( WITH_OPENEXR "Enable openexr support" ${WITH_ALL_OPTIONS} )
 option( WITH_UTEMPTER "Use utempter for utmp management" ${WITH_ALL_OPTIONS} )
 option( WITH_AVAHI "Enable AVAHI support" ${WITH_ALL_OPTIONS} )
 option( WITH_ELFICON "Enable ELF embedded icon support" ${WITH_ALL_OPTIONS} )
-option( WITH_PCRE "Enable pcre regex support for kjs" ON )
+option( WITH_PCRE2 "Enable pcre2 regex support for kjs" ON )
 option( WITH_GCC_VISIBILITY "Enable fvisibility and fvisibility-inlines-hidden" ${WITH_ALL_OPTIONS} )
 option( WITH_INOTIFY "Enable inotify support for tdeio" ON )
 option( WITH_GAMIN "Enable FAM/GAMIN support" ${WITH_ALL_OPTIONS} )
@ -1223,14 +1223,14 @@ if( WITH_ELFICON )
 endif( )
-##### check for pcre ###########################
+##### check for pcre2 ###########################
-if( WITH_PCRE )
+if( WITH_PCRE2 )
-  pkg_search_module( LIBPCRE libpcre )
+  pkg_check_modules( LIBPCRE2 libpcre2-8 libpcre2-posix )
-  if( NOT LIBPCRE_FOUND )
+  if( NOT LIBPCRE2_FOUND )
-      message(FATAL_ERROR "\npcre support are requested, but not found on your system" )
+    tde_message_fatal( "pcre2 support was requested, but not found on your system" )
-  endif( NOT LIBPCRE_FOUND )
+  endif( )
-  set( HAVE_PCREPOSIX 1 )
+  set( HAVE_PCRE2POSIX 1 )
 endif( )
--- a/config.h.cmake
+++ b/config.h.cmake
@ -417,8 +417,8 @@
 /* Define to 1 if you have the <paths.h> header file. */
 #cmakedefine HAVE_PATHS_H 1
-/* Define if you have pcreposix libraries and header files. */
+/* Define if you have pcre2 libraries and header files. */
-#cmakedefine HAVE_PCREPOSIX 1
+#cmakedefine HAVE_PCRE2POSIX 1
 /* Define to 1 if you have the `poll' function. */
 #cmakedefine HAVE_POLL 1
--- a/configure.in.bot
+++ b/configure.in.bot
@ -23,12 +23,12 @@ if test -z "$LIBART_CONFIG"; then
  all_tests=bad
 fi
-if test -z "$LIBPCRE"; then
+if test -z "$LIBPCRE2"; then
  echo ""
-  echo "You're missing libpcre."
+  echo "You're missing libpcre2."
  echo "Some web pages (using regular expressions in Javascript code) will not "
-  echo "work correctly, the regexp support being limited without libpcre. "
+  echo "work correctly, the regexp support being limited without libpcre2. "
-  echo "If you plan to use Konqueror as a Web Browser, download libpcre from "
+  echo "If you plan to use Konqueror as a Web Browser, download libpcre2 from "
  echo "http://www.pcre.org or find a binary package for your platform."
  echo ""
  all_tests=bad
--- a/kjs/CMakeLists.txt
+++ b/kjs/CMakeLists.txt
@ -15,12 +15,12 @@ include_directories(
  ${CMAKE_CURRENT_BINARY_DIR}
  ${CMAKE_BINARY_DIR}
  ${CMAKE_SOURCE_DIR}/tdecore
-  ${LIBPCRE_INCLUDEDIR}
+  ${LIBPCRE2_INCLUDEDIR}
 )
 link_directories(
  ${TDECORE_LIBRARY_DIRS}
-  ${LIBPCRE_LIBDIR}
+  ${LIBPCRE2_LIBDIR}
 )
@ -62,6 +62,6 @@ tde_add_library( ${target} SHARED
  SOURCES ${${target}_SRCS}
  VERSION 1.2.0
  LINK tdecore-shared
-  LINK_PRIVATE ${LIBPCRE_LIBRARIES}
+  LINK_PRIVATE ${LIBPCRE2_LIBRARIES}
  DESTINATION ${LIB_INSTALL_DIR}
 )
--- a/kjs/Makefile.am
+++ b/kjs/Makefile.am
@ -17,7 +17,7 @@
 #    Boston, MA 02110-1301, USA.
 YACC = bison
-INCLUDES = $(PCRECFLAGS) $(all_includes)
+INCLUDES = $(PCRE2CFLAGS) $(all_includes)
 lib_LTLIBRARIES = libkjs.la
@ -50,7 +50,7 @@ endif
 libkjs_la_LDFLAGS = -version-info 3:0:2 -no-undefined $(VSCRIPT) \
          $(USER_LDFLAGS) $(all_libraries)
-libkjs_la_LIBADD = -lm $(LIBPCRE)
+libkjs_la_LIBADD = -lm $(LIBPCRE2)
 EXTRA_DIST = grammar.y
@ -93,7 +93,7 @@ CLEANFILES = $(LUT_FILES)
 ## test program (in one program for easier profiling/memory debugging)
 EXTRA_PROGRAMS = testkjs_static
 testkjs_static_SOURCES = testkjs.cpp 
-testkjs_static_LDADD = $(LIBPCRE) libkjs.la
+testkjs_static_LDADD = $(LIBPCRE2) libkjs.la
 testkjs_static_LDFLAGS = -static
 ## test program (linked to libkjs)
--- a/kjs/configure.in.in
+++ b/kjs/configure.in.in
@ -2,52 +2,55 @@ dnl KDE JavaScript specific configure tests
 AC_CHECK_HEADERS(ieeefp.h float.h)
-AC_DEFUN([AC_CHECK_PCREPOSIX],
+AC_DEFUN(AC_CHECK_PCRE2POSIX],
 [
-  dnl define the configure option that disables pcre
+  dnl define the configure option that disables pcre2
-  AC_ARG_ENABLE(pcre,AC_HELP_STRING([--disable-pcre],[don't require libpcre (poor RegExp support in Javascript)]),
+  AC_ARG_ENABLE(pcre2,AC_HELP_STRING([--disable-pcre],[don't require libpcre (poor RegExp support in Javascript)]),
-      with_pcre=$enableval, with_pcre=yes)
+      with_pcre2=$enableval, with_pcre2=yes)
- if test "$with_pcre" = "yes"; then
+ if test "$with_pcre2" = "yes"; then
-    KDE_FIND_PATH(pcre-config, PCRE_CONFIG, [${exec_prefix}/bin ${prefix}/bin], [PCRE_CONFIG="" ])
+    KDE_FIND_PATH(pcre2-config, PCRE2_CONFIG, [${exec_prefix}/bin ${prefix}/bin], [PCRE2_CONFIG="" ])
-    if test -n "$PCRE_CONFIG" && $PCRE_CONFIG --libs >/dev/null 2>&1; then
+    if test -n "$PCRE2_CONFIG" && $PCRE2_CONFIG --libs8 >/dev/null 2>&1; then
-        LIBPCRE=`$PCRE_CONFIG --libs-posix | sed -e "s,-L/usr/lib ,," -e "s,[\b-].\+pcreposix[^[:space:]]*\b,,"`
+        LIBPCRE2=`$PCRE2_CONFIG --libs-posix | sed -e "s,-L/usr/lib ,," -e "s,[\b-].\+pcreposix[^[:space:]]*\b,,"`
-        PCRECFLAGS=`$PCRE_CONFIG --cflags`
+        PCRE2CFLAGS=`$PCRE2_CONFIG --cflags`
    else
-        LIBPCRE="-lpcre"
+        LIBPCRE2="-lpcre2-8"
-        PCRECFLAGS=
+        PCRE2CFLAGS=
    fi
-    AC_CACHE_VAL(ac_cv_have_pcreposix, [
+    AC_CACHE_VAL(ac_cv_have_pcre2posix, [
      ac_save_libs="$LIBS"
-      LIBS="$LIBPCRE"
+      LIBS="$LIBPCRE2"
      ac_CPPFLAGS_save="$CPPFLAGS"
-      CPPFLAGS="$CPPFLAGS $PCRECFLAGS $all_includes"
+      CPPFLAGS="$CPPFLAGS $PCRE2CFLAGS $all_includes"
      ac_LDFLAGS_save="$LDFLAGS"
      LDFLAGS="$LDFLAGS $all_libraries"
      AC_TRY_LINK(
-         [#include <pcre.h>],
+         [
-         [regfree(0);],
+          #define PCRE2_CODE_UNIT_WIDTH 8
-         [ac_cv_have_pcreposix="yes"],
+          #include <pcre2.h>
-         [ac_cv_have_pcreposix="no"]
+         ],
         [pcre2_regfree(0);],
         [ac_cv_have_pcre2posix="yes"],
         [ac_cv_have_pcre2posix="no"]
      )
      LIBS="$ac_save_libs"
      LDFLAGS="$ac_LDFLAGS_save"
      CPPFLAGS="$ac_CPPFLAGS_save"
    ])
-    if test "$ac_cv_have_pcreposix" = "yes"; then
+    if test "$ac_cv_have_pcre2posix" = "yes"; then
-        AC_DEFINE(HAVE_PCREPOSIX, 1, [Define if you have pcreposix libraries and header files.])
+        AC_DEFINE(HAVE_PCRE2POSIX, 1, [Define if you have pcre2posix libraries and header files.])
    else
        AC_MSG_ERROR([You're missing libpcre.
-Download libpcre from http://www.pcre.org or find a binary package for your platform.
+Download libpcre2 from http://www.pcre.org or find a binary package for your platform.
 Alternatively, you can specify --disable-pcre, but some web pages - using regular
 expressions in Javascript code - will not work correctly, the regexp support being
 quite limited if libpcre isn't present.])
    fi
  fi
 ])
-AC_CHECK_PCREPOSIX
+AC_CHECK_PCRE2POSIX
-AC_SUBST(LIBPCRE)
+AC_SUBST(LIBPCRE2)
-AC_SUBST(PCRECFLAGS)
+AC_SUBST(PCRE2CFLAGS)
 AM_CONFIG_HEADER([kjs/global.h])
--- a/kjs/regexp.cpp
+++ b/kjs/regexp.cpp
@ -30,21 +30,17 @@
 using namespace KJS;
 #ifdef PCRE_CONFIG_UTF8
 RegExp::UTF8SupportState RegExp::utf8Support = RegExp::Unknown;
 #endif
 RegExp::RegExp(const UString &p, int f)
  : pat(p), flgs(f), m_notEmpty(false), valid(true), buffer(0), originalPos(0)
 {
  // Determine whether libpcre has unicode support if need be..
 #ifdef PCRE_CONFIG_UTF8
  if (utf8Support == Unknown) {
-    int supported;
+    uint32_t supported;
-    pcre_config(PCRE_CONFIG_UTF8, (void*)&supported);
+    pcre2_config(PCRE2_CONFIG_COMPILED_WIDTHS, (void*)&supported);
-    utf8Support = supported ? Supported : Unsupported;
+    utf8Support = (supported & 0x0001) ? Supported : Unsupported;
  }
 #endif
  nrSubPatterns = 0; // determined in match() with POSIX regex.
@ -126,45 +122,46 @@ RegExp::RegExp(const UString &p, int f)
    intern = p;
  }
-#ifdef HAVE_PCREPOSIX
+#ifdef HAVE_PCRE2POSIX
-  int pcreflags = 0;
+  uint32_t pcre2flags = 0;
-  const char *perrormsg;
+  int errorCode;
-  int errorOffset;
+  PCRE2_SIZE errorOffset;
  if (flgs & IgnoreCase)
-    pcreflags |= PCRE_CASELESS;
+    pcre2flags |= PCRE2_CASELESS;
  if (flgs & Multiline)
-    pcreflags |= PCRE_MULTILINE;
+    pcre2flags |= PCRE2_MULTILINE;
 #ifdef PCRE_CONFIG_UTF8
  if (utf8Support == Supported)
-    pcreflags |= (PCRE_UTF8 | PCRE_NO_UTF8_CHECK);
+    pcre2flags |= (PCRE2_UTF | PCRE2_NO_UTF_CHECK);
 #endif
  // Fill our buffer with an encoded version, whether utf-8, or, 
  // if PCRE is incapable, truncated.
  prepareMatch(intern);
-  pcregex = pcre_compile(buffer, pcreflags,
+  pcregex = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, pcre2flags,
-			 &perrormsg, &errorOffset, NULL);
+       &errorCode, &errorOffset, NULL);
  doneMatch(); // Cleanup buffers
  if (!pcregex) {
 #ifndef NDEBUG
-    fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
+    PCRE2_UCHAR errorMsg[256];
    pcre2_get_error_message(errorCode, errorMsg, sizeof(errorMsg));
    fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", errorMsg);
 #endif
    valid = false;
    return;
  }
 #ifdef PCRE_INFO_CAPTURECOUNT
  // Get number of subpatterns that will be returned
-  int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
+  int rc = pcre2_pattern_info(pcregex, PCRE2_INFO_CAPTURECOUNT, &nrSubPatterns);
  if (rc != 0)
-#endif
+  {
    nrSubPatterns = 0; // fallback. We always need the first pair of offsets.
  }
-#else /* HAVE_PCREPOSIX */
+  match_data = pcre2_match_data_create_from_pattern(pcregex, NULL);
 #else
  int regflags = 0;
 #ifdef REG_EXTENDED
@ -195,9 +192,15 @@ RegExp::RegExp(const UString &p, int f)
 RegExp::~RegExp()
 {
  doneMatch(); // Be 100% sure buffers are freed
-#ifdef HAVE_PCREPOSIX
+#ifdef HAVE_PCRE2POSIX
  if (match_data)
  {
    pcre2_match_data_free(match_data);
  }
  if (pcregex)
-    pcre_free(pcregex);
+  {
    pcre2_code_free(pcregex);
  }
 #else
  /* TODO: is this really okay after an error ? */
  regfree(&preg);
@ -208,7 +211,7 @@ void RegExp::prepareUtf8(const UString& s)
 {
  // Allocate a buffer big enough to hold all the characters plus \0
  const int length = s.size();
-  buffer = new char[length * 3 + 1];
+  buffer = new buftype_t[length * 3 + 1];
  // Also create buffer for positions. We need one extra character in there,
  // even past the \0 since the non-empty handling may jump one past the end
@ -217,7 +220,7 @@ void RegExp::prepareUtf8(const UString& s)
  // Convert to runs of 8-bit characters, and generate indeces
  // Note that we do NOT combine surrogate pairs here, as 
  // regexps operate on them as separate characters
-  char *p      = buffer;
+  buftype_t *p = buffer;
  int  *posOut = originalPos;
  const UChar *d = s.data();
  for (int i = 0; i != length; ++i) {
@ -225,16 +228,16 @@ void RegExp::prepareUtf8(const UString& s)
    int sequenceLen;
    if (c < 0x80) {
-      *p++ = (char)c;
+      *p++ = (buftype_t)c;
      sequenceLen = 1;
    } else if (c < 0x800) {
-      *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
+      *p++ = (buftype_t)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
-      *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
+      *p++ = (buftype_t)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
      sequenceLen = 2;
    } else {
-      *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
+      *p++ = (buftype_t)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
-      *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
+      *p++ = (buftype_t)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
-      *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
+      *p++ = (buftype_t)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
      sequenceLen = 3;
    }
@ -262,7 +265,7 @@ void RegExp::prepareASCII (const UString& s)
  // when we don't have utf 8 available -- use 
  // truncated version, and pray for the best 
  CString truncated = s.cstring();
-  buffer = new char[truncated.size() + 1];
+  buffer = new buftype_t[truncated.size() + 1];
  memcpy(buffer, truncated.c_str(), truncated.size());
  buffer[truncated.size()] = '\0'; // For _compile use
  bufferSize = truncated.size();
@ -272,11 +275,9 @@ void RegExp::prepareMatch(const UString &s)
 {
  delete[] originalPos; // Just to be sure..
  delete[] buffer;
 #ifdef PCRE_CONFIG_UTF8
  if (utf8Support == Supported)
    prepareUtf8(s);
  else
 #endif
    prepareASCII(s);
 #ifndef NDEBUG
@ -308,17 +309,16 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
  if (i > s.size() || s.isNull())
    return UString::null;
-#ifdef HAVE_PCREPOSIX
+#ifdef HAVE_PCRE2POSIX
-  int ovecsize = (nrSubPatterns+1)*3; // see pcre docu
+  if (!pcregex || !match_data)
-  if (ovector) *ovector = new int[ovecsize];
+    return UString::null;
-  if (!pcregex)
+  if (!ovector)
    return UString::null;
  int startPos;
  int nextPos;
-
+  if (utf8Support == Supported)
-#ifdef PCRE_CONFIG_UTF8
+  {
  if (utf8Support == Supported) {
    startPos = i;
    while (originalPos[startPos] < i)
      ++startPos;
@ -328,53 +328,59 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
      while (originalPos[nextPos] < (i + 1))
        ++nextPos;
    }
-  } else
+  }
-#endif
+  else
  {
    startPos = i;
    nextPos  = i + (i < s.size() ? 1 : 0);
  }
-  int baseFlags =
+  uint32_t baseFlags = (utf8Support == Supported ? PCRE2_NO_UTF_CHECK : 0);
-#ifdef PCRE_CONFIG_UTF8
+  if (m_notEmpty)
-    utf8Support == Supported ? PCRE_NO_UTF8_CHECK :
+  {
-#endif
+    baseFlags |= PCRE2_NOTEMPTY | PCRE2_ANCHORED;
-    0;
+  }
-  int numMatches = pcre_exec(pcregex, NULL, buffer, bufferSize, startPos,
+  int numMatches = pcre2_match(pcregex, buffer, PCRE2_ZERO_TERMINATED, startPos, baseFlags, match_data, NULL);
-                             m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED | baseFlags) : baseFlags, // see man pcretest
+  if (numMatches <= 0)
                             ovector ? *ovector : 0L, ovecsize);
  if (numMatches < 0)
  {
    // Failed to match.
-    if (numMatches == PCRE_ERROR_NOMATCH && (flgs & Global) && m_notEmpty && ovector && startPos < nextPos)
+    if (numMatches == PCRE2_ERROR_NOMATCH && (flgs & Global) && m_notEmpty && startPos < nextPos)
    {
      // We set m_notEmpty ourselves, to look for a non-empty match
      // (see man pcretest or pcretest.c for details).
      // So we don't stop here, we want to try again at i+1.
 #ifdef KJS_VERBOSE
      fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");
 #endif
      m_notEmpty = 0;
-      numMatches = pcre_exec(pcregex, NULL, buffer, bufferSize, nextPos, baseFlags,
+      baseFlags = (utf8Support == Supported ? PCRE2_NO_UTF_CHECK : 0);
-                             ovector ? *ovector : 0L, ovecsize);
+      numMatches = pcre2_match(pcregex, buffer, PCRE2_ZERO_TERMINATED, nextPos, baseFlags, match_data, NULL);
-      if (numMatches < 0)
+      if (numMatches <= 0)
        return UString::null;
    }
-    else // done
+    else
      return UString::null;
  }
-  // Got a match, proceed with it.
+  PCRE2_SIZE *pcre2_ovector = pcre2_get_ovector_pointer(match_data);
-  // But fix up the ovector if need be..
+  if (!pcre2_ovector)
-  if (ovector && originalPos) {
+    return UString::null;
-    for (unsigned c = 0; c < 2 * TQMIN((unsigned)numMatches, nrSubPatterns+1); ++c) {
+
-      if ((*ovector)[c] != -1)
+  uint32_t pcre2_ovecCount = pcre2_get_ovector_count(match_data);
-        (*ovector)[c] = originalPos[(*ovector)[c]];
+  *ovector = new int[pcre2_ovecCount * 2];
  if (originalPos)
  {
    for (size_t c = 0; c < 2 * pcre2_ovecCount; ++c)
    {
      (*ovector)[c] = (pcre2_ovector[c] != -1) ? originalPos[pcre2_ovector[c]] : -1;
    }
  }
  else
  {
    for (size_t c = 0; c < 2 * pcre2_ovecCount; ++c)
    {
      (*ovector)[c] = pcre2_ovector[c];
    }
  }
  if (!ovector)
    return UString::null; // don't rely on the return value if you pass ovector==0
 #else
  const uint maxMatch = 10;
  regmatch_t rmatch[maxMatch];
@ -419,28 +425,3 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
  }
  return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
 }
 #if 0 // unused
 bool RegExp::test(const UString &s, int)
 {
 #ifdef HAVE_PCREPOSIX
  int ovector[300];
  CString buffer(s.cstring());
  if (s.isNull() ||
      pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
 		0, ovector, 300) == PCRE_ERROR_NOMATCH)
    return false;
  else
    return true;
 #else
  char *str = strdup(s.ascii());
  int r = regexec(&preg, str, 0, 0, 0);
  free(str);
  return r == 0;
 #endif
 }
 #endif
--- a/kjs/regexp.h
+++ b/kjs/regexp.h
@ -25,13 +25,16 @@
 #include "config.h"
-#ifdef HAVE_PCREPOSIX
+#ifdef HAVE_PCRE2POSIX
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
 #include <pcre2.h>
 typedef PCRE2_UCHAR8 buftype_t; 
 #else  // POSIX regex - not so good...
 extern "C" { // bug with some libc5 distributions
 #include <regex.h>
 typedef char buftype_t; 
 }
-#endif //HAVE_PCREPOSIX
+#endif
 #include "ustring.h"
@ -61,7 +64,7 @@ namespace KJS {
    bool valid;
    // Cached encoding info...
-    char* buffer;
+    buftype_t *buffer;
    int*  originalPos;
    int   bufferSize;
@ -71,22 +74,20 @@ namespace KJS {
    UString originalS; // the original string, used for sanity-checking
 #endif
-#ifndef HAVE_PCREPOSIX
+#ifndef HAVE_PCRE2POSIX
    regex_t preg;
 #else
-    pcre *pcregex;
+    pcre2_code *pcregex;
    pcre2_match_data *match_data;
    enum UTF8SupportState {
      Unknown,
      Supported,
      Unsupported
    };
 #ifdef PCRE_CONFIG_UTF8
    static UTF8SupportState utf8Support;
 #endif
-#endif
+    uint32_t nrSubPatterns;
    unsigned int nrSubPatterns;
    RegExp();
  };