package LambdaTerm; require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(simplify simplify_safe freevars); use bytes; # Usage (in another Perl program): # use LambdaTerm qw(simplify); # print simplify("(%x f(x))(3)"); # # See simplify script for a usage example for simplify_safe. # See buildattrs script for a usage example for freevars. # # Note: If LambdaTerm.pm is not in the current directory when you run # the other program, then it must be in a directory mentioned by your # PERLLIB path environment variable. # # Author: Jason Eisner , 2001-10-18, to support 600.465 HW3. # ---------------------------------------------------------------------- # This package provides the simplify function, which simplifies ("evaluates") # a lambda-calculus expression represented as a string. Some bells and # whistles are allowed in the expressions so that you can write fairly # natural-looking semantics. # # 1. Constants and variables are alphanumeric strings (you can also use # the characters _ and ' in these strings). # # 2. You can use commas in the natural way, as in "likes(y,x)". # This will be interpreted as likes(y)(x), or more precisely # (likes(y))(x). (Recall that a function of two arguments must be # interpreted as a "curried" function of this sort because formally we # only have one-argument functions in the lambda calculus.) # # 3. Symbol strings like ^ and -> are assumed to be infix functions, # so you can write "x ^ y" and not have it misinterpreted as meaning x(^,y). # (There is currently no way to bind definitions to these infix # functions though - they are just constant symbols.) # These infix functions have lower precedence than concatenation, # so x ^ y (z) is interpreted as x ^ (y(z)), which is usually what you want. # (If you do intend it as a ternary operator then you should use a # notation like x ^ y . z (just like "x ? y : z" in languages like C).) # # 4. % represents lambda, as in "%x unicorn(x)" or "% x unicorn(x)". # Note that while "unicorn" denotes a particular function, the # "x" is a dummy variable whose name is not important, and the # simplifier might have to rename it to avoid conflicts with # other variables. # # 5. You can invent other binding symbols ending in %, such as E% or # 3% or Exists% to represent the "backwards E" of # existential quantification: "possible(E% x unicorn(x))". Here # too x is a renameable dummy variable, rather than a constant, # and using the notation E% rather than just E lets the # simplifier know that. (It also makes it possible to leave out # the parentheses in this example, for what that's worth.) # # 6. You can use three kinds of parentheses if it helps you # read the formulas more easily: (), [], and {}. All three kinds # mean the same thing. # # 7. The simplifier tries to preserve as much as possible of your string's # original spacing and notation. For example, the following notations # are equivalent but are not interconverted: # f(x,y), f(x)(y), f x y, (f) [ x ] { y } # # 8. The simplifier actually does more than evaluation, since it reduces # the entire term to a normal form. This includes simplifying the # bodies of functions that have not applied to anything. Thus # %x ((%g g(x))(h)) simplifies to %x h(x). (But we do decline to simplify # further to just "h" since the user may like to show how many arguments # h is supposed to have and how they're expressed.) A similar package is # Christophe Raffalli's "Normaliser for Pure and Typed Lambda-Calculus," # available on the web. # # This is the untyped lambda-calculus, so even without constants you # can express any Turing-computable function, even though we only have # 200 lines of code here. The equivalence is fascinating and you # can ask me about it if you want! Languages that directly implement # the untyped lambda-calculus include Scheme, LISP, ML, and in fact Perl, # although that doesn't help us here since we can't print the internal # representation of a Perl function. # # Formally there are just 3 kinds of term: # # A simple constant: x, foo, --> # A binding term: %x foo(x), E%x foo(x), %foo foo(x) # A juxtaposition of two subterms: foo (x), (%x foo(x)) (a) # This is interpreted as applying the first subterm (the function) # to the second subterm(the argument). # # A sequence of more than two terms is interpreted left-associatively, # so f(x)(y)(z) means ((f(x))(y))(z). # # You can think of , as being replaced by )( so that f(x,y,z) # becomes f(x)(y)(z) and for that matter (f,x,y,z) becomes # (f)(x)(y)(z). All of these could be written even more simply as f x y z. # # Parentheses are unnecessary except to group things, so (%x foo(x))(a) # could actually be written as just (%x foo x) a. # # How simplification works (roughly): # If the first term in a juxtaposition is a lambda-term, we can # simplify the sequence by applying it (the function) to its # argument. Thus to simplify (%x foo(x))(a), we can evaluate the # body foo(x) with x replaced by a, yielding foo(a). # # Subtle point: If the function body itself contains a dummy # variable a, the simplifier renames that variable to avoid # conflicts: (%x %a foo(x,a))(a) is treated as (%x %b foo(x,b))(a) # so that it simplifies not to %a foo(a,a) but rather to %b foo(a,b). # # Subtle point: The simplifier often has a choice about what part of # the expression to simplify first. The order doesn't matter for # purposes of this assignment, but for the record, # this simplifier uses the most general strategy, called normal-order # evaluation. This is also called call-by-name or top-down or # leftmost outermost evaluation. (You may also hear about lazy # or call-by-need evaluation, which is the same thing, only more # efficient.) For more information see, e.g., # http://www.csse.monash.edu.au/~lloyd/tildeFP/Lambda/Ch/01.Calc.html # http://wombat.doc.ic.ac.uk/foldoc/foldoc.cgi?call-by-name # # Implementation: # The implementation here is quick-and-dirty, or maybe # slow-and-dirty. Everything is done in terms of strings; we # retokenize and reparse a subexpression every time we consider it. # This is obviously inefficient -- it would be faster (and probably # safer) to maintain a tree representation, which could include the # information about spacing and parentheses and such. Even better # would be to do what real languages like Scheme do, and recursively # evaluate while maintaining a stack of bindings. # !!!TO DO: # - An exported prettyprinter for lambda terms. # - Add a tracing option so user can see what is going on. # There are some existing tracing lines commented out, but we really # want something that prints the entire string at every stage of # simplification, maybe with the hot area marked. (Print a marker # line between stages, using ====---____ or ///''',,, to indicate # what is being reduced to what.) # ---------------------------------------------------------------------- # DEFINITIONS OF TOKEN TYPES # ---------------------------------------------------------------------- my($varpat) = "[A-Za-z0-9'_]+"; # variables are alphanumeric strings; also allow ' and _ my($oppat) = "[^][(){}\x01\x02,%\\sA-Za-z0-9]+"; # infix operators are strings of non-alphanumerics, excepting reserved characters my($bindpat) = "(?:%|$varpat%)"; # binding elements look like variables plus %. Also used to allow $oppat% (e.g., "!%") but this led to an unintuitive interpretation of (%x x).%y y since .% was parsed as a bindpat. my($i) = 0; # indentation level for tracing # ---------------------------------------------------------------------- # MAIN FUNCTION # ---------------------------------------------------------------------- # Wrappers to call from outside. sub simplify { my($result) = simplify1($_[0]); # throw away outermost parens return $result; } # A further wrapper that makes errors non-fatal. If the input expression is # ill-formed, we just return undef and leave the error message in $@ for # the caller to inspect. sub simplify_safe { eval { simplify($_[0]) }; # eval traps any exception that gets raised } # ---------------------------------------------------------------------- # MAIN FUNCTION # ---------------------------------------------------------------------- # Just a wrapper around simp, to do tracing. sub simplify1 { # print STDERR (" " x ($i+=2)), "simplify: $_[0]\n"; my(@resultlist) = simp($_[0]); # print STDERR " "x$i, "|simplified $_[0] to $resultlist[0]\n" unless $resultlist[0] eq $_[0]; i -= 2; return @resultlist; } # Simplifies a lambda expression: input and output are both strings. # # The output string is not surrounded by parentheses. This means any # initial % is conveniently exposed for further application. But it # also means the calling function (another instance of simp) will # usually have to put parentheses around it before juxtaposing it with # another term. So we also return two other values: $prec helps the # caller decide WHETHER to parenthesize, while @parens tells the caller # HOW to parenthesize (via restoreparens). $prec indicates the # precedence of the output's top-level operator. If that operator is # a binder, which takes very low precedence, we return 0. If it is an # infix operator, we return 1. If it is concatenation, we return 2. # Finally, if all we have is a single term or operator, we return 3. # # The effect is that in an expression like f[x](y,z), the square # brackets and the comma are a convention attached to f, and stick # around no matter what values are substituted in for x,y,z. # (However, if something is substituted for f and swallows up those # arguments, their brackets disappear.) sub simp { # First, split argument into a top-level sequence of expressions. # It would ordinarily suffice to read them in one at a time, but we # need to get all of them to see whether there are any infix # operators. # # Note that if any of the expressions starts with a parenthesis, it # must end with a *matching* parenthesis, or else it would have been # split further into two or more expressions. So we can use # removeparens to remove the parentheses from the outside. my(@exprs) = parsetop($_[0]); # split argument # Now decide what to do based on whether we have 0, 1, or more expressions # in our sequence. if (@exprs==0) { die "simplify: empty expression or subexpression\n"; } elsif (@exprs==1) { if ($exprs[0] =~ /^\s*[[({\x01]/) { # The single expression is parenthesized itself, so remove the # parentheses and recurse to get both the simplified expression # and its precedence. my(@parens) = removeparens($exprs[0]); # destructive my($result,$prec) = simplify1($exprs[0]); # throw away inner parens return ($result, $prec, @parens); # return outer parens } elsif ($exprs[0] =~ /^\s*$bindpat\s*$varpat\s*/o) { # the terminal spaces \s* should actually never show up; following whitespace belongs to the wrapping of the next argument or ). # The expression has the form "binder variable body". # If the binder is %, it could be argued that we needn't # simplify the body, but it makes for prettier output. # If the binder is something else (like an existential quantifier), # then simplifying the body is really necessary, just as # if the binder were a function. my($binding,$body) = ($&, $'); # binding = binder + variable; spacing = space between binding and body ($body,my $prec,my @parens) = simplify1($body); # throw away $prec restoreparens($body, @parens, $prec >= 0); # $prec >= 0 says we never need these parens, but if the user thought they looked pretty we'll keep them glue($binding,$body); return ($binding, 0, "", ""); # no particular parentheses recommended for this sequence; restoreparens can make some up } elsif ($exprs[0] =~ /^\s*(?:$varpat|$oppat)\s*$/o) { # the terminal spaces \s* should actually never show up; following whitespace belongs to the wrapping of the next argument or ). my(@parens) = removeparens($exprs[0]); # in this case, just removes surrounding whitespace. return ($exprs[0], 3, @parens); # Just an ordinary token of some sort: can't simplify it any further. } else { die "simplify: internal error: unknown form $exprs[0]]"; } } elsif (grep(/^\s*$oppat\s*$/o, @exprs)) { # At least one of the @exprs is an unparenthesized infix operator. # Infix operators have lower precedence than concatenation # (although higher precedence than binders). So we'll simplify # each concatenation of expressions between infix operators. my $result = ""; # simplified terms accumulate here my $acc = ""; # unsimplified terms accumulate here push(@exprs,""); # add sentinel while (@exprs) { my $expr = shift(@exprs); unless ($expr eq "" || $expr =~ /^\s*$oppat\s*$/o) { # an ordinary expression glue($acc,$expr); } else { # hitting an infix operator, or end of sequence, makes it time to reduce @acc if ($acc ne "") { ($acc, my $prec, my @parens) = simplify1($acc); restoreparens($acc, @parens, # restore parentheses, forcing some kind of parentheses unless it has higher precedence than our current infix thing or is a final binding term (in which case binder is unambiguously a prefix operator) $prec > 1 || ($prec==0 && @exprs==0)); glue($result, $acc); $acc = ""; # reset } glue($result, $expr); } } return ($result, 1, "", ""); # no particular parentheses recommended for this sequence; restoreparens can make some up } else { # We have an ordinary sequence of 2 or more expressions: apply the first to # the rest, to the extent we can. # Apply $result (the result of recursively simplifying the first # expression) to the next expression, by substitution plus # recursive simplification. Apply the result of that to the next # expression, and so on until we either run out of expressions # or we get something that we don't know how to apply (i.e., a # constant symbol representing a function). my $result = shift(@exprs); # result so far my($prec, @parens); # stuff returned along with that result $result =~ s/^\s*//; my $initspace = $&; # save initial spacing, which is retained in an application (whereas the parens come from the body of the lambda term) while (1) { ($result,$prec,@parens) = simplify1($result); # exposes any leading %, if this works out to a function that we can apply further # print STDERR " "x$i, ":result is now $result with remaining args ",join(",",@exprs),"\n"; last unless (@exprs && $result =~ /^\s*($bindpat)\s*($varpat)\s*/o); # can we go further? # $result has the form %x (or E% x) and there's something to apply it to. my($binder, $var, $body) = ($1,$2,$'); die "simplify: a $binder-term is not a function and cannot be applied to an argument: $result\n" unless $binder eq "%"; $result = (replace($body,$var,delparens(shift(@exprs)))); # delparens is more than cosmetic because the expression may include a final comma, which would not be legal in a substitution # Note that we are throwing away the parens from both applier # and applyee in favor of the ones from (the post-replacement # version of) $body. On the next iteration, $body's parens will # overwrite the applier's, and notice that we explictly deleted # the applyee's (although it will be surrounded by () if # necessary). } $parens[0] = $initspace.$parens[0]; if (@exprs==0) { return ($result, $prec, @parens); } else { # We have to now apply $result to the remaining @exprs, which we don't # know how to do any further, so we must return the application unevaluated. # We do simplify each of the remaining @exprs (though it could be # argued that we shouldn't: consider "runsforever((%f f f)(%f f f))" # vs. compare "(%x halts)((%f f f)(%f f f))"). restoreparens($result,@parens,$prec >= 2); # reparenthesize, forcing some kind of parentheses unless it has at least as high precedence as this concatenation (equal precedence is ok since concat is left-associative and we're first) while (@exprs) { my($expr, $prec, @parens) = simplify1(shift(@exprs)); restoreparens($expr, @parens, # restore parentheses exactly as in infix case (but with different precedence cutoff): force some kind of parentheses unless it has higher precedence than our current concatenation or is a final binding term (in which case binder is unambiguously a prefix operator) $prec > 2 || ($prec==0 && @exprs==0)); glue($result, $expr); } return ($result,2,"",""); # no particular parentheses recommended for this sequence; restoreparens can make some up } } } # ---------------------------------------------------------------------- # PARENTHESIS HANDLING # ---------------------------------------------------------------------- # Destructively remove an outer layer of parentheses if any, along # with the spacing outside and inside them. Return the removed # wrapping, which can be restored later after the inner string is # simplified. This function assumes that if the string starts with a # parenthesis it ends with a *matching* parenthesis: don't give it # "(x)(y)" since it would merrily return "x)(y"! sub removeparens { my($pre,$post); if ($_[0] =~ s/^\s*[[({\x01]\s*//) { $pre = $&; die "simplify: internal error $pre,$_." unless $_[0] =~ s/\s*,?[])}\x02]\s*$//; $post = $&; } else { $_[0] =~ s/^\s*//; $pre = $&; $_[0] =~ s/\s*$//; $post = $&; } return ($pre,$post); } # Same thing, but throw away the wrapper and return the new value. sub delparens { my($e) = @_; removeparens($e); $e; } # Destructively wrap parentheses back around an unparenthesized string. # # $noforce is true if the need for parentheses is merely cosmetic. If # not cosmetic, we have to make sure () gets added if the supplied # parentheses are whitespace or the "as needed" parens \x01 and \x02. # If just cosmetic, we can stick with whitespace and delete \x01 and # \x02. sub restoreparens { my($e,$pre,$post,$noforce) = @_; if ($noforce) { $pre =~ s/\x01//; $post =~ s/\x02//; } else { $pre =~ s/\x01/\(/; $post =~ s/\x02/\)/; $e = "($e)" if $pre =~ /^\s*$/; } $_[0] = $pre.$e.$post; } # ---------------------------------------------------------------------- # WORKHORSE ROUTINE: does all of the interesting work. # ---------------------------------------------------------------------- # Remove a single expression from the beginning of $_ and write a munged # version of it onto the end of $Eaten. We read as little as possible, # i.e., if $_ consists of a sequence of expressions we just read one. # # This function reads or modifies various variables in its caller's environment, # which are capitalized to indicate that they have dynamic scope. # We modify $_ and $Eaten as just described. # We replace free occurrences of $Var with $Arg, which should already be # parenthesized. (To turn this off, define $Var to be something like "" # that won't match any variable.) # $Env{x} records how many instances of %x have scope over us. This is # information we get from the caller: we need it to determine whether x is # bound or free in this expression. We may increase it temporarily on # recursive calls but will change it back before we return. # We set $Free{x}=1 if we have encountered x free, i.e., as a constant. # This is information that we provide back to the caller. Actually # we only do this if %Free is nonempty. # We rename variable x to $Newname{x}. (Note that we only rename dummy # variables, not constants, whose names are fixed.) The first time we # encounter x as a variable, we look at $Conflicts{x} to see whether x # has to be renamed, and accordingly set $Newname{x} to either x or # something else. We do not rename x when it is free, i.e., a constant # rather than a dummy variable. # (Note: We could merge %Env with %Newname by having $Env{x} be a # stack of newnames, but we may as well use a single $Newname{x} # consistently to rename even occurrences of x that have nothing to # do with one another.) # # We handle a comma-separated arglist (x , y , z) by reading off # "(x ,)" and leaving behind " (y , z)". On the next two calls we would # read " (y ,)" and " (z)". When parseall reads these three # arguments, they are glued onto the end of $Eaten using glue, which # reconstitutes the original list perfectly. Similarly, when simplify reads # these three via parsetop, it will try to use them up via application, but it # will glue simplified versions of any unused ones back together: for # example, in (%a a)(x,y,z), simplify applies (%a a) to (x,) but doesn't # know how to apply the resulting x to (y,) and (z), so it just # glues them together to get x(y,z). sub parseone { # print STDERR (" " x ($i+=2)), "parseone: $_\n"; # Note: should probably use \Q...\E construction to quote patterns. die "simplify: unexpected right parenthesis at end of \"$Eaten\" before \"$_\"\n" if eat("[])}\x02,]"); die "simplify: internal error, no expressions to be found" if eat("\$"); if (my $left = eat("[([{\x01]")) { my $right = $left; $right =~ tr/([{\x01/)]}\x02/; # matching right paren char we'll look for &parseall; die "simplify: missing $right after \"$Eaten\" just before \"$_\"\n" unless eat("[$right,]"); # If we closed with a comma, either complete it with $right (e.g., if the # expression we were given to parse was already something like "(x,)") or else # pretend we saw $right and match it with a $left to start the next thing # (e.g., if the expression was "(x, y, z)"). eat("\\$right") || ($Eaten .= $right, s/^\s*/$&$left/) if $Eaten =~ /,$/; } elsif (eat($bindpat)) { # We have a binder: let's get the dummy variable. die "simplify: expected variable following binder just before \"$_\"\n" unless eat("") && s/^$varpat//o; my($foundvar) = $&; # How should we rename the variable, here and any other time we # see it non-free? (Our caller will also use this new name if the # variable appears elsewhere, since our entry in %Newname will persist # after we return. This is okay, and just means that unrelated instances of # %x will both be renamed the same way. We could actually avoid this by # making $Newname{x} be a stack (in fact, the height of the stack would # play the role of $Env{x}), but why bother?) unless (defined $Newname{$foundvar}) { # if we haven't already chosen a renaming $Newname{$foundvar} = $foundvar; # first try to rename to itself while ($Conflicts{$Newname{$foundvar}}) { # Pick a new name to try. Here we use Perl's magic increment, # which should work pretty well with alphabetic variables (it turns # x -> y -> z -> aa -> ab ...) and not too badly with meaningful # variable names (e.g., subject -> subjecu -> subjecv ...) # An alternative would be to add ' to the current name, or to # maintain a number at the end of the name. $Newname{$foundvar}++; } # Variables that don't have newnames yet (we haven't # seen them before) are not allowed to conflict with the name # we just picked -- they might initially have had that name, or we # might try renaming them to that name, but either way we # shouldn't allow it, in case we take scope over them. $Conflicts{$Newname{$foundvar}} = 1; } $Eaten .= $Newname{$foundvar}; # Ok, now that we know how to rename $foundvar when we see # it in the body, let's parse the body. $Env{$foundvar}++; # increment number of copies of this variable with scope over us &parseall; # read as much body as we possibly can $Env{$foundvar}--; # pop out of that scope delete $Env{$foundvar} if $Env{$foundvar}==0; # clean up after ourselves (not really necessary) } elsif (eat("$oppat")) { # infix operator. # nothing else to do } elsif (eat("") && s/^$varpat//o) { # a variable or constant my($foundvar)=$&; if ($Env{$foundvar}) { # bound variable - we should know how to rename it die "simplify: internal error" unless defined $Newname{$foundvar}; $Eaten .= $Newname{$foundvar}; } else { # free variable - we may have to substitute for it # print STDERR " " x $i, "free var: $foundvar; remaining $_\n"; $Free{$foundvar}=1 if %Free; $Eaten .= ($foundvar eq $Var) ? $Arg : $foundvar; # replace $Var by $Arg, using parens conservatively for safety; leave other variables alone } } else { die "simplify: internal error: don't know how to continue at \"$_\""; } # print STDERR " "x$i, "|\n"; $i-=2; } # Called when parseone recurses. sub parseall { # print STDERR (" " x ($i+=2)), "parseall: $_\n"; &parseone until /^\s*($|[])}\x02,])/; # read expressions until we hit something that would make parseone crash (end of string or right paren); compare replace, parsetop # print STDERR " "x$i, "|\n"; $i-=2; } # ---------------------------------------------------------------------- # ROUTINES THAT USE PARSING # ---------------------------------------------------------------------- # Breaks string into a list of top-level expressions. # Note: A top-level expression may start with spaces but does not end with them. # # As a side effect, if %Free is nonempty, sets $Free{v}=1 for each free # variable v in the string. # # See parseone for how commas are handled. sub parsetop { local($_) = @_; local $Var = ""; # makes sure that when we scan $_, we do no replacements local %Env = (); # keep track of bound vars local %Conflicts = (); # don't care about this local %Newname = (); # don't care about this my @exprs = (); until (/^\s*$/) { # read expressions till we reach end of line; compare replace, parseall local $Eaten = ""; &parseone; # get an expression push(@exprs,$Eaten); } return @exprs; } # Get list of free vars out of a lambda expression. sub freevars { # print STDERR (" " x ($i+=2)), "freevars: $_\n"; local %Free = ("",""); # making this nonempty causes parsetop to put entries in it as a side effect (not enough to set it to (), yuck) parsetop($_[0]); delete $Free{""}; # print STDERR " "x$i, "|found free vars ", join(" ",keys %Free), "\n"; $i-=2; return keys %Free; } # Return a version of $_ in which free occurrences of $Var have been # replaced by $Arg (which is passed unparenthesized by caller but is # parenthesized between \x01 and \x02 on replacement; these special # symbols are later deleted if possible or else replaced with ordinary # parens). # # Also, variables in $_ are renamed as necessary to avoid capturing # the free variables of $Arg. (The new names must also avoid # capturing free variables of either $f or $Arg -- i.e., (%x %e # f(x,e)) (e(g)) must rename e, but mustn't rename it to f or g! -- # and they must not conflict with one another, either.) # # The renaming is aggressive - if foo appears free in $Arg or $_, we # rename all its non-free instances in $_, even ones that don't # take scope over $Var (including any non-free instances of $Var itself!). # In other words, we may do some unnecessary renaming. sub replace { local($_,$Var,$Arg) = @_; # $Var tells parse what to replace # print STDERR (" " x ($i+=2)), "replace $Var with $Arg in $_\n"; $Arg =~ s/^\s*//; # kill initial whitespace in replacement. (Should have no final whitespace.) $Arg = "\x01$Arg\x02"; # protect with temporary parentheses # Figure out which variables we must rename. local %Conflicts; # records variables that are free in arg { local %Free = ("",""); # making this nonempty causes parsetop to put entries in it as a side effect (not enough to set it to (), yuck) parsetop($Arg); # throw away result, but figure out which vars are free parsetop($_); # and which vars are free in $_ as well %Conflicts = (%Free); } # Okay, go ahead and process $_. local %Newname = (); # renaming table to avoid conflicts local %Env = (); # keep track of bound variables local $Eaten = ""; &parseone until eat("\$"); # read expressions till we reach end of $_; compare parsetop, parseall # print STDERR " "x$i, "|replaced to $Eaten\n"; $i-=2; return $Eaten; } # ---------------------------------------------------------------------- # LOW-LEVEL ROUTINES FOR SCANNING INPUT # ---------------------------------------------------------------------- # If $pattern matches at beginning of $_ (perhaps with preceding # whitespace), remove it from $_ and glue it to end of $Eaten. # Return whether we got such a match. (Actually return the match # string itself (without whitespace) for the caller's inspection, # unless it has a false value.) # # Note that eat("") eats leading whitespace from $_. sub eat { my($pattern)=@_; if (s/^\s*($pattern)//) { my($match) = $1; glue($Eaten, $&); return $match ? $match : 1; } else { return ""; } } # Concatenate $y onto $x (destructively), fixing up commas (see parseone for # details) and spaces. sub glue { my($x, $y) = @_; # If $x ends in comma followed by close paren, delete the close paren and # also the open paren that must start $y. $y =~ s/^(\s*)[[({\x01]/$1/ || die "simplify: internal error" if $x =~ s/,[])}\x02]$/,/; # Insert whitespace if $x and $y are in danger of glomming tokens together. # I'm not sure whether this ever happens in practice. $x .= " ", warn "added space to combine $x and $y." if $x =~ /$varpat$/o && $y =~ /^$varpat/o || $x =~ /$oppat$/o && $y =~ /^$oppat/o; $_[0] = $x.$y; } 1; # signal that this module was loaded successfully