# Read words and write their stems to mystems.
# Stemming is performed with a transducer called Stemmer, 
# which must already be defined.

# Build a (minimized) FSA that matches any line of the "words" file.
# Call this FSA "Dictionary".
read text < words    
define Dictionary    

# Get results of passing all words in Dictionary through Stemmer
# in parallel.
define Stems [Dictionary .o. Stemmer].l;   

# Output and sort the results.
# Original version looked like this:
#     print words Stems > mystems0
#     system sort mystems0 | uniq > mystems

# But the academic license for the latest version of xfst
# only lets you print 5000 words at once, so we will hack
# around that by printing in batches:

regex Stems & [a|b|c] ?*; print words > mystems1
regex Stems & [d|e|f] ?*; print words > mystems2
regex Stems & [g|h|i] ?*; print words > mystems3
regex Stems & [j|k|l] ?*; print words > mystems4
regex Stems & [m|n|o] ?*; print words > mystems5
regex Stems & [p|q|r] ?*; print words > mystems6
regex Stems & [s|t|u] ?*; print words > mystems7
regex Stems - [a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u] ?*; print words > mystems8
system sort mystems[1-8] | uniq > mystems
system rm mystems[1-8]