Changeset View
Changeset View
Standalone View
Standalone View
contrib/mg/re_search.c
- This file was added.
| /* $OpenBSD: re_search.c,v 1.36 2021/04/22 19:50:55 lum Exp $ */ | |||||
| /* This file is in the public domain. */ | |||||
| /* | |||||
| * regular expression search commands for Mg | |||||
| * | |||||
| * This file contains functions to implement several of gnuemacs's regular | |||||
| * expression functions for Mg. Several of the routines below are just minor | |||||
| * re-arrangements of Mg's non-regular expression search functions. Some of | |||||
| * them are similar in structure to the original MicroEMACS, others are | |||||
| * modifications of Rich Ellison's code. Peter Newton re-wrote about half of | |||||
| * them from scratch. | |||||
| */ | |||||
| #ifdef REGEX | |||||
| #include <sys/queue.h> | |||||
| #include <sys/types.h> | |||||
| #include <regex.h> | |||||
| #include <signal.h> | |||||
| #include <stdio.h> | |||||
| #include <string.h> | |||||
| #include "def.h" | |||||
| #include "macro.h" | |||||
| #define SRCH_BEGIN (0) /* search sub-codes */ | |||||
| #define SRCH_FORW (-1) | |||||
| #define SRCH_BACK (-2) | |||||
| #define SRCH_NOPR (-3) | |||||
| #define SRCH_ACCM (-4) | |||||
| #define SRCH_MARK (-5) | |||||
| #define RE_NMATCH 10 /* max number of matches */ | |||||
| #define REPLEN 256 /* max length of replacement string */ | |||||
| char re_pat[NPAT]; /* regex pattern */ | |||||
| int re_srch_lastdir = SRCH_NOPR; /* last search flags */ | |||||
| int casefoldsearch = TRUE; /* does search ignore case? */ | |||||
| static int re_doreplace(RSIZE, char *); | |||||
| static int re_forwsrch(void); | |||||
| static int re_backsrch(void); | |||||
| static int re_readpattern(char *); | |||||
| static int killmatches(int); | |||||
| static int countmatches(int); | |||||
| /* | |||||
| * Search forward. | |||||
| * Get a search string from the user and search for it starting at ".". If | |||||
| * found, move "." to just after the matched characters. display does all | |||||
| * the hard stuff. If not found, it just prints a message. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| re_forwsearch(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("RE Search")) != TRUE) | |||||
| return (s); | |||||
| if (re_forwsrch() == FALSE) { | |||||
| dobeep(); | |||||
| ewprintf("Search failed: \"%s\"", re_pat); | |||||
| return (FALSE); | |||||
| } | |||||
| re_srch_lastdir = SRCH_FORW; | |||||
| return (TRUE); | |||||
| } | |||||
| /* | |||||
| * Reverse search. | |||||
| * Get a search string from the user, and search, starting at "." | |||||
| * and proceeding toward the front of the buffer. If found "." is left | |||||
| * pointing at the first character of the pattern [the last character that | |||||
| * was matched]. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| re_backsearch(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("RE Search backward")) != TRUE) | |||||
| return (s); | |||||
| if (re_backsrch() == FALSE) { | |||||
| dobeep(); | |||||
| ewprintf("Search failed: \"%s\"", re_pat); | |||||
| return (FALSE); | |||||
| } | |||||
| re_srch_lastdir = SRCH_BACK; | |||||
| return (TRUE); | |||||
| } | |||||
| /* | |||||
| * Search again, using the same search string and direction as the last search | |||||
| * command. The direction has been saved in "srch_lastdir", so you know which | |||||
| * way to go. | |||||
| * | |||||
| * XXX: This code has problems -- some incompatibility(?) with extend.c causes | |||||
| * match to fail when it should not. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| re_searchagain(int f, int n) | |||||
| { | |||||
| if (re_srch_lastdir == SRCH_NOPR) { | |||||
| dobeep(); | |||||
| ewprintf("No last search"); | |||||
| return (FALSE); | |||||
| } | |||||
| if (re_srch_lastdir == SRCH_FORW) { | |||||
| if (re_forwsrch() == FALSE) { | |||||
| dobeep(); | |||||
| ewprintf("Search failed: \"%s\"", re_pat); | |||||
| return (FALSE); | |||||
| } | |||||
| return (TRUE); | |||||
| } | |||||
| if (re_srch_lastdir == SRCH_BACK) | |||||
| if (re_backsrch() == FALSE) { | |||||
| dobeep(); | |||||
| ewprintf("Search failed: \"%s\"", re_pat); | |||||
| return (FALSE); | |||||
| } | |||||
| return (TRUE); | |||||
| } | |||||
| /* Compiled regex goes here-- changed only when new pattern read */ | |||||
| static regex_t regex_buff; | |||||
| static regmatch_t regex_match[RE_NMATCH]; | |||||
| /* | |||||
| * Re-Query Replace. | |||||
| * Replace strings selectively. Does a search and replace operation. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| re_queryrepl(int f, int n) | |||||
| { | |||||
| int rcnt = 0; /* replacements made so far */ | |||||
| int plen, s; /* length of found string */ | |||||
| char news[NPAT]; /* replacement string */ | |||||
| if ((s = re_readpattern("RE Query replace")) != TRUE) | |||||
| return (s); | |||||
| if (eread("Query replace %s with: ", news, NPAT, | |||||
| EFNUL | EFNEW | EFCR, re_pat) == NULL) | |||||
| return (ABORT); | |||||
| ewprintf("Query replacing %s with %s:", re_pat, news); | |||||
| /* | |||||
| * Search forward repeatedly, checking each time whether to insert | |||||
| * or not. The "!" case makes the check always true, so it gets put | |||||
| * into a tighter loop for efficiency. | |||||
| */ | |||||
| while (re_forwsrch() == TRUE) { | |||||
| retry: | |||||
| update(CMODE); | |||||
| switch (getkey(FALSE)) { | |||||
| case ' ': | |||||
| plen = regex_match[0].rm_eo - regex_match[0].rm_so; | |||||
| if (re_doreplace((RSIZE)plen, news) == FALSE) | |||||
| return (FALSE); | |||||
| rcnt++; | |||||
| break; | |||||
| case '.': | |||||
| plen = regex_match[0].rm_eo - regex_match[0].rm_so; | |||||
| if (re_doreplace((RSIZE)plen, news) == FALSE) | |||||
| return (FALSE); | |||||
| rcnt++; | |||||
| goto stopsearch; | |||||
| case CCHR('G'): /* ^G */ | |||||
| (void)ctrlg(FFRAND, 0); | |||||
| goto stopsearch; | |||||
| case CCHR('['): /* ESC */ | |||||
| case '`': | |||||
| goto stopsearch; | |||||
| case '!': | |||||
| do { | |||||
| plen = regex_match[0].rm_eo - regex_match[0].rm_so; | |||||
| if (re_doreplace((RSIZE)plen, news) == FALSE) | |||||
| return (FALSE); | |||||
| rcnt++; | |||||
| } while (re_forwsrch() == TRUE); | |||||
| goto stopsearch; | |||||
| case CCHR('?'): /* To not replace */ | |||||
| break; | |||||
| default: | |||||
| ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); | |||||
| goto retry; | |||||
| } | |||||
| } | |||||
| stopsearch: | |||||
| curwp->w_rflag |= WFFULL; | |||||
| update(CMODE); | |||||
| if (!inmacro) { | |||||
| if (rcnt == 0) | |||||
| ewprintf("(No replacements done)"); | |||||
| else if (rcnt == 1) | |||||
| ewprintf("(1 replacement done)"); | |||||
| else | |||||
| ewprintf("(%d replacements done)", rcnt); | |||||
| } | |||||
| return (TRUE); | |||||
| } | |||||
| int | |||||
| re_repl(int f, int n) | |||||
| { | |||||
| int rcnt = 0; /* replacements made so far */ | |||||
| int plen, s; /* length of found string */ | |||||
| char news[NPAT]; /* replacement string */ | |||||
| if ((s = re_readpattern("RE Replace")) != TRUE) | |||||
| return (s); | |||||
| if (eread("Replace %s with: ", news, NPAT, | |||||
| EFNUL | EFNEW | EFCR, re_pat) == NULL) | |||||
| return (ABORT); | |||||
| while (re_forwsrch() == TRUE) { | |||||
| plen = regex_match[0].rm_eo - regex_match[0].rm_so; | |||||
| if (re_doreplace((RSIZE)plen, news) == FALSE) | |||||
| return (FALSE); | |||||
| rcnt++; | |||||
| } | |||||
| curwp->w_rflag |= WFFULL; | |||||
| update(CMODE); | |||||
| if (!inmacro) | |||||
| ewprintf("(%d replacement(s) done)", rcnt); | |||||
| return(TRUE); | |||||
| } | |||||
| /* | |||||
| * Routine re_doreplace calls lreplace to make replacements needed by | |||||
| * re_query replace. Its reason for existence is to deal with \1, \2. etc. | |||||
| * plen: length to remove | |||||
| * st: replacement string | |||||
| */ | |||||
| static int | |||||
| re_doreplace(RSIZE plen, char *st) | |||||
| { | |||||
| int j, k, s, more, num, state; | |||||
| struct line *clp; | |||||
| char repstr[REPLEN]; | |||||
| clp = curwp->w_dotp; | |||||
| more = TRUE; | |||||
| j = 0; | |||||
| state = 0; | |||||
| num = 0; | |||||
| /* The following FSA parses the replacement string */ | |||||
| while (more) { | |||||
| switch (state) { | |||||
| case 0: | |||||
| if (*st == '\\') { | |||||
| st++; | |||||
| state = 1; | |||||
| } else if (*st == '\0') | |||||
| more = FALSE; | |||||
| else { | |||||
| repstr[j] = *st; | |||||
| j++; | |||||
| if (j >= REPLEN) | |||||
| return (FALSE); | |||||
| st++; | |||||
| } | |||||
| break; | |||||
| case 1: | |||||
| if (*st >= '0' && *st <= '9') { | |||||
| num = *st - '0'; | |||||
| st++; | |||||
| state = 2; | |||||
| } else if (*st == '\0') | |||||
| more = FALSE; | |||||
| else { | |||||
| repstr[j] = *st; | |||||
| j++; | |||||
| if (j >= REPLEN) | |||||
| return (FALSE); | |||||
| st++; | |||||
| state = 0; | |||||
| } | |||||
| break; | |||||
| case 2: | |||||
| if (*st >= '0' && *st <= '9') { | |||||
| num = 10 * num + *st - '0'; | |||||
| st++; | |||||
| } else { | |||||
| if (num >= RE_NMATCH) | |||||
| return (FALSE); | |||||
| k = regex_match[num].rm_eo - regex_match[num].rm_so; | |||||
| if (j + k >= REPLEN) | |||||
| return (FALSE); | |||||
| bcopy(&(clp->l_text[regex_match[num].rm_so]), | |||||
| &repstr[j], k); | |||||
| j += k; | |||||
| if (*st == '\0') | |||||
| more = FALSE; | |||||
| if (*st == '\\') { | |||||
| st++; | |||||
| state = 1; | |||||
| } else { | |||||
| repstr[j] = *st; | |||||
| j++; | |||||
| if (j >= REPLEN) | |||||
| return (FALSE); | |||||
| st++; | |||||
| state = 0; | |||||
| } | |||||
| } | |||||
| break; | |||||
| } /* switch (state) */ | |||||
| } /* while (more) */ | |||||
| repstr[j] = '\0'; | |||||
| s = lreplace(plen, repstr); | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * This routine does the real work of a forward search. The pattern is | |||||
| * sitting in the external variable "pat". If found, dot is updated, the | |||||
| * window system is notified of the change, and TRUE is returned. If the | |||||
| * string isn't found, FALSE is returned. | |||||
| */ | |||||
| static int | |||||
| re_forwsrch(void) | |||||
| { | |||||
| int re_flags, tbo, tdotline, error; | |||||
| struct line *clp; | |||||
| clp = curwp->w_dotp; | |||||
| tbo = curwp->w_doto; | |||||
| tdotline = curwp->w_dotline; | |||||
| if (tbo == clp->l_used) | |||||
| /* | |||||
| * Don't start matching past end of line -- must move to | |||||
| * beginning of next line, unless line is empty or at | |||||
| * end of file. | |||||
| */ | |||||
| if (clp != curbp->b_headp && llength(clp) != 0) { | |||||
| clp = lforw(clp); | |||||
| tdotline++; | |||||
| tbo = 0; | |||||
| } | |||||
| /* | |||||
| * Note this loop does not process the last line, but this editor | |||||
| * always makes the last line empty so this is good. | |||||
| */ | |||||
| while (clp != (curbp->b_headp)) { | |||||
| re_flags = REG_STARTEND; | |||||
| if (tbo != 0) | |||||
| re_flags |= REG_NOTBOL; | |||||
| regex_match[0].rm_so = tbo; | |||||
| regex_match[0].rm_eo = llength(clp); | |||||
| error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", | |||||
| RE_NMATCH, regex_match, re_flags); | |||||
| if (error != 0) { | |||||
| clp = lforw(clp); | |||||
| tdotline++; | |||||
| tbo = 0; | |||||
| } else { | |||||
| curwp->w_doto = regex_match[0].rm_eo; | |||||
| curwp->w_dotp = clp; | |||||
| curwp->w_dotline = tdotline; | |||||
| curwp->w_rflag |= WFMOVE; | |||||
| return (TRUE); | |||||
| } | |||||
| } | |||||
| return (FALSE); | |||||
| } | |||||
| /* | |||||
| * This routine does the real work of a backward search. The pattern is sitting | |||||
| * in the external variable "re_pat". If found, dot is updated, the window | |||||
| * system is notified of the change, and TRUE is returned. If the string isn't | |||||
| * found, FALSE is returned. | |||||
| */ | |||||
| static int | |||||
| re_backsrch(void) | |||||
| { | |||||
| struct line *clp; | |||||
| int tbo, tdotline; | |||||
| regmatch_t lastmatch; | |||||
| clp = curwp->w_dotp; | |||||
| tbo = curwp->w_doto; | |||||
| tdotline = curwp->w_dotline; | |||||
| /* Start search one position to the left of dot */ | |||||
| tbo = tbo - 1; | |||||
| if (tbo < 0) { | |||||
| /* must move up one line */ | |||||
| clp = lback(clp); | |||||
| tdotline--; | |||||
| tbo = llength(clp); | |||||
| } | |||||
| /* | |||||
| * Note this loop does not process the last line, but this editor | |||||
| * always makes the last line empty so this is good. | |||||
| */ | |||||
| while (clp != (curbp->b_headp)) { | |||||
| regex_match[0].rm_so = 0; | |||||
| regex_match[0].rm_eo = llength(clp); | |||||
| lastmatch.rm_so = -1; | |||||
| /* | |||||
| * Keep searching until we don't match any longer. Assumes a | |||||
| * non-match does not modify the regex_match array. We have to | |||||
| * do this character-by-character after the first match since | |||||
| * POSIX regexps don't give you a way to do reverse matches. | |||||
| */ | |||||
| while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", | |||||
| RE_NMATCH, regex_match, REG_STARTEND) && | |||||
| regex_match[0].rm_so <= tbo) { | |||||
| memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); | |||||
| regex_match[0].rm_so++; | |||||
| regex_match[0].rm_eo = llength(clp); | |||||
| } | |||||
| if (lastmatch.rm_so == -1) { | |||||
| clp = lback(clp); | |||||
| tdotline--; | |||||
| tbo = llength(clp); | |||||
| } else { | |||||
| memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); | |||||
| curwp->w_doto = regex_match[0].rm_so; | |||||
| curwp->w_dotp = clp; | |||||
| curwp->w_dotline = tdotline; | |||||
| curwp->w_rflag |= WFMOVE; | |||||
| return (TRUE); | |||||
| } | |||||
| } | |||||
| return (FALSE); | |||||
| } | |||||
| /* | |||||
| * Read a pattern. | |||||
| * Stash it in the external variable "re_pat". The "pat" is | |||||
| * not updated if the user types in an empty line. If the user typed | |||||
| * an empty line, and there is no old pattern, it is an error. | |||||
| * Display the old pattern, in the style of Jeff Lomicka. There is | |||||
| * some do-it-yourself control expansion. | |||||
| */ | |||||
| static int | |||||
| re_readpattern(char *re_prompt) | |||||
| { | |||||
| static int dofree = 0; | |||||
| int flags, error, s; | |||||
| char tpat[NPAT], *rep; | |||||
| if (re_pat[0] == '\0') | |||||
| rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt); | |||||
| else | |||||
| rep = eread("%s (default %s): ", tpat, NPAT, | |||||
| EFNUL | EFNEW | EFCR, re_prompt, re_pat); | |||||
| if (rep == NULL) | |||||
| return (ABORT); | |||||
| if (rep[0] != '\0') { | |||||
| /* New pattern given */ | |||||
| (void)strlcpy(re_pat, tpat, sizeof(re_pat)); | |||||
| if (casefoldsearch) | |||||
| flags = REG_EXTENDED | REG_ICASE; | |||||
| else | |||||
| flags = REG_EXTENDED; | |||||
| if (dofree) | |||||
| regfree(®ex_buff); | |||||
| error = regcomp(®ex_buff, re_pat, flags); | |||||
| if (error != 0) { | |||||
| char message[256]; | |||||
| regerror(error, ®ex_buff, message, sizeof(message)); | |||||
| dobeep(); | |||||
| ewprintf("Regex Error: %s", message); | |||||
| re_pat[0] = '\0'; | |||||
| return (FALSE); | |||||
| } | |||||
| dofree = 1; | |||||
| s = TRUE; | |||||
| } else if (rep[0] == '\0' && re_pat[0] != '\0') | |||||
| /* Just using old pattern */ | |||||
| s = TRUE; | |||||
| else | |||||
| s = FALSE; | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * Cause case to not matter in searches. This is the default. If called | |||||
| * with argument cause case to matter. | |||||
| */ | |||||
| /* ARGSUSED*/ | |||||
| int | |||||
| setcasefold(int f, int n) | |||||
| { | |||||
| if (f & FFARG) { | |||||
| casefoldsearch = FALSE; | |||||
| ewprintf("Case-fold-search unset"); | |||||
| } else { | |||||
| casefoldsearch = TRUE; | |||||
| ewprintf("Case-fold-search set"); | |||||
| } | |||||
| /* | |||||
| * Invalidate the regular expression pattern since I'm too lazy to | |||||
| * recompile it. | |||||
| */ | |||||
| re_pat[0] = '\0'; | |||||
| return (TRUE); | |||||
| } | |||||
| /* | |||||
| * Delete all lines after dot that contain a string matching regex. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| delmatchlines(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("Flush lines (containing match for regexp)")) | |||||
| != TRUE) | |||||
| return (s); | |||||
| s = killmatches(TRUE); | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * Delete all lines after dot that don't contain a string matching regex. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| delnonmatchlines(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("Keep lines (containing match for regexp)")) | |||||
| != TRUE) | |||||
| return (s); | |||||
| s = killmatches(FALSE); | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * This function does the work of deleting matching lines. | |||||
| */ | |||||
| static int | |||||
| killmatches(int cond) | |||||
| { | |||||
| int s, error; | |||||
| int count = 0; | |||||
| struct line *clp; | |||||
| clp = curwp->w_dotp; | |||||
| if (curwp->w_doto == llength(clp)) | |||||
| /* Consider dot on next line */ | |||||
| clp = lforw(clp); | |||||
| while (clp != (curbp->b_headp)) { | |||||
| /* see if line matches */ | |||||
| regex_match[0].rm_so = 0; | |||||
| regex_match[0].rm_eo = llength(clp); | |||||
| error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", | |||||
| RE_NMATCH, regex_match, REG_STARTEND); | |||||
| /* Delete line when appropriate */ | |||||
| if ((cond == FALSE && error) || (cond == TRUE && !error)) { | |||||
| curwp->w_doto = 0; | |||||
| curwp->w_dotp = clp; | |||||
| count++; | |||||
| s = ldelete(llength(clp) + 1, KNONE); | |||||
| clp = curwp->w_dotp; | |||||
| curwp->w_rflag |= WFMOVE; | |||||
| if (s == FALSE) | |||||
| return (FALSE); | |||||
| } else | |||||
| clp = lforw(clp); | |||||
| } | |||||
| ewprintf("%d line(s) deleted", count); | |||||
| if (count > 0) | |||||
| curwp->w_rflag |= WFMOVE; | |||||
| return (TRUE); | |||||
| } | |||||
| /* | |||||
| * Count lines matching regex. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| cntmatchlines(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) | |||||
| return (s); | |||||
| s = countmatches(TRUE); | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * Count lines that fail to match regex. | |||||
| */ | |||||
| /* ARGSUSED */ | |||||
| int | |||||
| cntnonmatchlines(int f, int n) | |||||
| { | |||||
| int s; | |||||
| if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) | |||||
| return (s); | |||||
| s = countmatches(FALSE); | |||||
| return (s); | |||||
| } | |||||
| /* | |||||
| * This function does the work of counting matching lines. | |||||
| */ | |||||
| int | |||||
| countmatches(int cond) | |||||
| { | |||||
| int error; | |||||
| int count = 0; | |||||
| struct line *clp; | |||||
| clp = curwp->w_dotp; | |||||
| if (curwp->w_doto == llength(clp)) | |||||
| /* Consider dot on next line */ | |||||
| clp = lforw(clp); | |||||
| while (clp != (curbp->b_headp)) { | |||||
| /* see if line matches */ | |||||
| regex_match[0].rm_so = 0; | |||||
| regex_match[0].rm_eo = llength(clp); | |||||
| error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", | |||||
| RE_NMATCH, regex_match, REG_STARTEND); | |||||
| /* Count line when appropriate */ | |||||
| if ((cond == FALSE && error) || (cond == TRUE && !error)) | |||||
| count++; | |||||
| clp = lforw(clp); | |||||
| } | |||||
| if (cond) | |||||
| ewprintf("Number of lines matching: %d", count); | |||||
| else | |||||
| ewprintf("Number of lines not matching: %d", count); | |||||
| return (TRUE); | |||||
| } | |||||
| #endif /* REGEX */ | |||||