Logo Search packages:      
Sourcecode: zoem version File versions

op-inspect.c

/*      Copyright (C) 2001, 2002, 2003, 2004 Stijn van Dongen
 *
 * This file is part of Zoem. You can redistribute and/or modify Zoem under the
 * terms of the GNU General Public License;  either version 2 of the License or
 * (at your option) any later  version.  You should have received a copy of the
 * GPL along with Zoem, in the file COPYING.
*/

/* this is a redundant piece of code. I duplicated it because I want
 * to see whether inspect2 really needs a new implementation.
 * As yet, no work done.
*/

#include "op-inspect.h"

#include <ctype.h>
#include <stdlib.h>
#include <regex.h>

#include "util.h"
#include "digest.h"
#include "key.h"
#include "curly.h"
#include "segment.h"
#include "read.h"
#include "parse.h"
#include "iface.h"

#include "util/ting.h"
#include "util/ding.h"
#include "util/err.h"


/*
 *  Someone should take the time and refactor this code, or rewrite it from
 *  scratch. It also needs deciding and documenting which options go together
 *  and which do not.  Refactoring should be done wrt matching, building up the
 *  new string, and the vargwise branches. The current code is not very bad I
 *  hope, but it's certainly not satisfactory.
*/

yamSeg*  yamInspect4
(  yamSeg* seg
)
   {  mcxTing* mods        =  mcxTingNew(arg1_g->str)
   ;  mcxTing* pat         =  mcxTingNew(arg2_g->str)
   ;  mcxTing* sub         =  mcxTingNew(arg3_g->str)
   ;  mcxTing* data        =  mcxTingNew(arg4_g->str)
   ;  mcxTing* new         =  mcxTingEmpty(NULL, data->len)
   ;  int      regflags    =  REG_EXTENDED
   ;  int      n_hits      =  0
   ;  int      n_written   =  0
   ;  const char* me       =  "\\inspect#2"

   ;  mcxbool  grep        =  FALSE       /* only keep matching lines/scopes */
   ;  mcxbool  count       =  FALSE       /* only count matches              */
   ;  mcxbool  keepempty   =  TRUE        /* keep empty results              */
   ;  mcxbool  grepc       =  FALSE       /* keep nonmatching lines/scopes   */
   ;  mcxbool  interpolate =  FALSE       /* interpolate subpatterns         */
   ;  mcxbool  knmp        =  TRUE        /* keep non matching part          */
   ;  mcxbool  linewise    =  FALSE       /* repeat for each line            */
   ;  mcxbool  vargwise    =  FALSE       /* repeat for each vararg          */
   ;  mcxbool  partwise    =  FALSE       /* vargwise or linewise            */
   ;  mcxbool  look        =  FALSE       /* don't touch                     */

   ;  char     *A, *a, *p, *nl = NULL, *cc = NULL, *z, *Z
   ;  regex_t  reg
   ;  regmatch_t  matchpats[10]

   ;  int      rc, k = 0, i

   ;  if
      (  yamDigest(data, data, seg)
      || yamDigest(mods, mods, seg)
      || yamDigest(pat, pat, seg)
      || yamDigest(sub,sub, seg)
      )
      {
      default_fail
         :
         seg->flags |= SEGMENT_ERROR
      ;  mcxTingFree(&data)
      ;  mcxTingFree(&mods)
      ;  mcxTingFree(&pat)
      ;  mcxTingFree(&sub)
      ;  mcxTingFree(&new)
      ;  return seg
   ;  }

      yamUnprotect(pat)

   ;  if (!strstr(mods->str, "posix"))
      {  yamErr
         (me, "Only 'posix' regexes available, found <%s>", mods->str)
      ;  goto default_fail
   ;  }

      if (strstr(mods->str, "icase"))
      regflags |= REG_ICASE
   ;  if (!strstr(mods->str, "dotall"))
      regflags |= REG_NEWLINE
   ;  if (strstr(mods->str, "lines"))
      linewise = TRUE
   ;  if (strstr(mods->str, "vararg"))
      vargwise = TRUE
   ;  if (strstr(mods->str, "skip"))
      knmp = FALSE
   ;  if (strstr(mods->str, "look"))
      look = TRUE
   ;  if (strstr(mods->str, "count"))
      count = TRUE
   ;  if (strstr(mods->str, "snip"))
      keepempty = FALSE

   ;  if (strstr(mods->str, "grepc"))
      grepc = TRUE
   ;  else if (strstr(mods->str, "grep"))
      grep = TRUE

   ;  partwise = vargwise || linewise

   ;  if (look)
      {  knmp = TRUE
      ;  count = FALSE
   ;  }

      if (grepc || grep)
      {  count = FALSE
      ;  keepempty = FALSE
   ;  }

      if (regcomp(&reg, pat->str, regflags))
      {  yamErr(me, "regex <%s> did not compile", pat->str)
      ;  goto default_fail
   ;  }

      if (!look && !strncmp(sub->str, "_#", 2) && isdigit((unsigned char) *(sub->str+2)))
      {  interpolate  =  TRUE
      ;  k  =  (unsigned char) *(sub->str+2) - '0'
      ;  rc =  yamClosingCurly(sub, 3, NULL, RETURN_ON_FAIL)

      ;  if (rc < 0)
         {  yamErr(me, "unexpected error in anonymous sub part")
         ;  goto default_fail
      ;  }
         else if (k > 9 || rc+4 != sub->len)
         {  yamErr(me, "anonymous sub <%s> not ok", sub->str)
         ;  goto default_fail
      ;  }

         mcxTingNWrite(key_g, sub->str, 3)
      ;  n_args_g = k+1
      ;  mcxTingNWrite(arg1_g, sub->str+4, rc-1)
   ;  }
      else
      {  mcxTingWrite(key_g, sub->str)
   ;  }

      mcxTingFree(&sub)
   ;  mcxTingFree(&mods)
   ;  mcxTingFree(&pat)

  /* ******************************************************************* */
  /*     so far so good, let's rumble                                    */
  /* ******************************************************************* */

   ;  A  =  data->str
   ;  Z  =  data->str + data->len
   ;  p  =  A
   ;  a  =  A
   ;  z  =  Z
   ;  n_written = 0

   ;  if (vargwise)
      cc =  p-1
   ;  else if (linewise)
      nl =  p-1

   ;  while(p<=Z)
      {
         int so, eo, len
      ;  mcxbool match, matcheos, some, none, nil, now, nostring, nilnow, status

      ;  if (vargwise && p > cc)
         {
            cc++
         ;  while(isspace(*cc))
            cc++
         ;  if (cc<Z)
            {  rc  =  yamClosingCurly(data, cc-A, NULL, RETURN_ON_FAIL)
            ;  if (rc<0)
               {  yamErr(me, "data does not parse as vararg")
               ;  goto default_fail
            ;  }
               p     =  cc+1
            ;  cc   +=  rc
            ;  *cc   =  '\0'
            ;  a     =  p
            ;  z     =  cc
            ;  n_hits=  0
            ;  mcxTingAppend(new, "{")
            ;  n_written = new->len
         ;  }
            else
            break
      ;  }
         else if (linewise && p > nl)
         {  if (nl+1<Z)
            {  p  =  nl+1
            ;  a  =  p
            ;  if ((nl=strchr(p, '\n')))
               {  *nl   =  '\0'
               ;  n_hits=  0
               ;  z     =  nl
            ;  }
               else
               {  nl    =  Z
               ;  z     =  Z
            ;  }
               n_written = new->len
         ;  }
            else
            break
      ;  }

         status      =  regexec(&reg, p, 10, matchpats, 0)
      ;  match       =  !status
      ;  so          =  match ? matchpats[0].rm_so : -1
      ;  eo          =  match ? matchpats[0].rm_eo : -1
      ;  len         =  eo - so
      ;  some        =  n_hits
      ;  none        =  !n_hits
      ;  nil         =  !len
      ;  now         =  !so
      ;  nilnow      =  nil && now
      ;  nostring    =  a == z
      ;  matcheos    =  match && (p + eo ==z)


      ;  if (tracing_g & ZOEM_TRACE_REGEX)
         printf
         (  "%s"
            "hit now nil   p pso peo  so  eo len mtc eos\n"
            "%3d %3d %3d %3c %3c %3c %3d %3d %3d %3d %3d\n"
         ,  n_hits == 0 ? "\n" : ""
         ,  n_hits, now, nil
         ,  !p || !*p ? '-' : *p == '\n' ? 'N' : *p
         ,  !match ? '-' : !*(p+so) ? '-' : *(p+so) == '\n' ? 'N' : *(p+so)
         ,  !match ? '-' : !*(p+eo) ? '-' : *(p+eo) == '\n' ? 'N' : *(p+eo)
         ,  so, eo, len, match, matcheos
         )

      ;  if (grepc)
         {  
            if (!match)
            mcxTingAppend(new, p)

         ;  if (!keepempty && (vargwise && n_written == new->len))
               mcxTingShrink(new, new->len-1)
            ,  n_written--

         ;  if (partwise && (n_written<new->len || keepempty))
            mcxTingAppend(new, linewise ? "\n" : vargwise ? "}" : "")

         ;  p=z+1
         ;  continue
      ;  }

            /* if no match, possibly output final nmp */
                  /* ignore if (!some && grep) */

         if (!match && !count && knmp && (some || !grep))
         mcxTingAppend(new, p)

      ;  if (match)
         n_hits++

      ;  if (match && !count)
         {
            if (knmp)
            mcxTingNAppend(new, p, so)

         ;  if (interpolate)
            {  yamSeg* rowseg = NULL
            ;  for (i=1;i<=k;i++)
               {  
                  if (matchpats[i].rm_so < 0)
                     mcxWarn(me, "no atom <%d>", i)
                  ,  mcxTingEmpty(key_and_args_g+i+1, 0)
               ;  else
                  mcxTingNWrite
                  (  key_and_args_g+i+1
                  ,  p+matchpats[i].rm_so
                  ,  matchpats[i].rm_eo - matchpats[i].rm_so
                  )
            ;  }
               if ((rowseg = yamExpandKey(seg)))
                  mcxTingAppend(new,rowseg->txt->str)
               ,  yamSegFree(&rowseg)
         ;  }
            else if (look)
            mcxTingNAppend(new, p+so, len)
         ;  else
            mcxTingAppend(new, key_g->str)

         ;  if (nil && !nostring && *(p+so) && knmp)
            mcxTingNAppend(new, p+so, 1)
      ;  }

         else if ((!match || matcheos) && count)
         {  mcxTingInteger(new, n_hits)
         ;  p = z+1
      ;  }

         if
         (  vargwise
         && (!match || matcheos)
         && (n_written==new->len)
         && !keepempty
         )
         {  mcxTingShrink(new, new->len-1)
         ;  n_written = new->len
      ;  }

         if (match)
         p +=  (eo && !matcheos && !nil) ? eo : eo + 1
      ;  else
         p  = z+1

      ;  if
         (  partwise
         && (!match || matcheos)
         && (n_written<new->len || keepempty)
         )
         mcxTingAppend(new, linewise ? "\n" : vargwise ? "}" : "")
   ;  }

      mcxTingFree(&data)
   ;  regfree(&reg)

   ;  return yamSegPush(seg, new)
;  }


Generated by  Doxygen 1.6.0   Back to index