;;; stem.el ---- routines for stemming
;;; $Id: stem-english.el,v 1.1.1.1 1999/04/03 03:02:56 satoru-lookup Exp $

;;; Author: Tsuchiya Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
;;; Keywords: stemming

;;; Commentary:

;; $BO@J8!X(BAn algorithm for suffix stripping (M.F.Porter)$B!Y$K5-=R$5$l$F(B
;; $B$$$k%"%k%4%j%:%`$K4p$E$$$F!"1QC18l$N8lHx$r<h$j=|$/$?$a$N%i%$%V%i%j!#(B
;; $BMxMQ5Z$S:FG[I[$N:]$O!"(BGNU $B0lHL8xMQ5vBz=q$NE,Ev$J%P!<%8%g%s$K$7$?$,$C(B
;; $B$F2<$5$$!#(B

;; $B0l<!G[I[85(B
;;    http://www-nagao.kuee.kyoto-u.ac.jp/member/tsuchiya/elisp/xdic.html


;; -*- Emacs-Lisp -*-

(provide 'stem)

(defvar stem:minimum-word-length 4 "Porter $B$N%"%k%4%j%:%`$,E,MQ$G$-$k:G>.8lD9(B")


;;;============================================================
;;;	$BHs8x3+4X?t(B
;;;============================================================

;; $BF0:nB.EY$r8~>e$5$;$k$?$a$K!"4X?tFbIt$G30ItJQ?t$r$$$8$C$F$$$k(B
;; $B4X?t$,$"$j!"M=4|$7$J$$I{:nMQ$,H/@8$9$k2DG=@-$,9b$$!#=>$C$F!"(B
;; $BHs8x3+4X?t$rD>@\8F$S=P$9$3$H$OHr$1$k$3$H!#(B

;;------------------------------------------------------------
;;	stemming-rule $B$N>r7o@a$r5-=R$9$k4X?t72(B
;;------------------------------------------------------------

(defsubst stem:match (arg) "\
$BJQ?t(B str $B$r8!::$9$kHs8x3+4X?t(B ($B8l44$NItJ,$rJQ?t(B stem $B$KBeF~$9$k(B)"
  (and
   (string-match arg str)
   (setq stem (substring str 0 (match-beginning 0)))))

(defsubst stem:m () "\
$BJQ?t(B stem $B$K4^$^$l$F$$$k(B VC $B$N?t$r5a$a$kHs8x3+4X?t(B"
  (save-match-data
    (let ((pos 0)(m 0))
      (while (string-match "\\(a\\|e\\|i\\|o\\|u\\|[^aeiou]y+\\)[aeiou]*" stem pos)
	(setq m (1+ m))
	(setq pos (match-end 0)))
      (if (= pos (length stem)) (1- m) m))))

(defsubst stem:m> (i) "\
$BJQ?t(B stem $B$K4^$^$l$F$$$k(B VC $B$N?t$N>r7o$r5-=R$9$kHs8x3+4X?t(B"
  (< i (stem:m)))

(defsubst stem:m= (i) "\
$BJQ?t(B stem $B$K4^$^$l$F$$$k(B VC $B$N?t$N>r7o$r5-=R$9$kHs8x3+4X?t(B"
  (= i (stem:m)))

(defsubst stem:*v* () "\
$BJQ?t(B stem $B$,Jl2;$r4^$s$G$$$k$+8!::$9$k4X?t(B"
  (save-match-data
    (if (string-match "\\(a\\|e\\|i\\|o\\|u\\|[^aeiou]y\\)" stem) t)))

(defsubst stem:*o () "\
$BJQ?t(B stem $B$,(B cvc $B$N7A$G=*$C$F$$$k$+8!::$9$k4X?t(B"
  (save-match-data
    (if (string-match "[^aeiou][aeiouy][^aeiouwxy]$" stem) t)))



;;------------------------------------------------------------
;;	stemming-rule $B$r5-=R$7$?4X?t72(B
;;------------------------------------------------------------

(defun stem:step1a (str) "$BBh(B1a$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((s)(stem))
    (if (setq s (cond
		 ((stem:match "sses$") "ss")
		 ((stem:match "ies$")  "i")
		 ((stem:match "ss$")   "ss")
		 ((stem:match "s$")    "")))
	(concat stem s)
      str)))


(defun stem:step1b (str) "$BBh(B1b$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((s)(stem))
    (cond
     ((and (stem:match "eed$") (stem:m> 0))
      (concat stem "ee"))
     ((or (and (not stem) (stem:match "ed$") (stem:*v*))
	  (and (stem:match "ing$") (stem:*v*)))
      (if (and (stem:m= 1) (stem:*o))
	  (concat stem "e")
	(setq str stem)
	(if (setq s (cond
		     ((stem:match "at$") "ate")
		     ((stem:match "bl$") "ble")
		     ((stem:match "iz$") "ize")
		     ((stem:match "\\([^lsz]\\)\\1$")
		      (substring str (match-beginning 1) (match-end 1)))))
	    (concat stem s)
	  str)))
     (t str))))


(defun stem:step1c (str) "$BBh(B1c$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((stem))
    (if (and (stem:match "y$")
	     (stem:*v*))
	(concat stem "i")
      str)))


(defun stem:step1 (str) "$BBh(B1$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (stem:step1c
   (stem:step1b
    (stem:step1a str))))


(defun stem:step2 (str) "$BBh(B2$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((s)(stem))
    (if (and
	 (setq s (cond
		  ((stem:match "ational$") "ate")
		  ((stem:match "tional$")  "tion")
		  ((stem:match "enci$")    "ence")
		  ((stem:match "anci$")    "ance")
		  ((stem:match "izer$")    "ize")
		  ((stem:match "abli$")    "able")
		  ((stem:match "alli$")    "al")
		  ((stem:match "entli$")   "ent")
		  ((stem:match "eli$")     "e")
		  ((stem:match "ousli$")   "ous")
		  ((stem:match "ization$") "ize")
		  ((stem:match "ation$")   "ate")
		  ((stem:match "ator$")    "ate")
		  ((stem:match "alism$")   "al")
		  ((stem:match "iveness$") "ive")
		  ((stem:match "fulness$") "ful")
		  ((stem:match "ousness$") "ous")
		  ((stem:match "aliti$")   "al")
		  ((stem:match "iviti$")   "ive")
		  ((stem:match "biliti$")  "ble")))
	 (stem:m> 0))
	(concat stem s)
      str)))


(defun stem:step3 (str) "$BBh(B3$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((s)(stem))
    (if (and
	 (setq s (cond
		  ((stem:match "icate$") "ic")
		  ((stem:match "ative$") "")
		  ((stem:match "alize$") "al")
		  ((stem:match "iciti$") "ic")
		  ((stem:match "ical$")  "ic")
		  ((stem:match "ful$")   "")
		  ((stem:match "ness$")  "")))
	 (stem:m> 0))
	(concat stem s)
      str)))


(defun stem:step4 (str) "$BBh(B4$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((stem))
    (if (and (or
	      (stem:match "al$")
	      (stem:match "ance$")
	      (stem:match "ence$")
	      (stem:match "er$")
	      (stem:match "ic$")
	      (stem:match "able$")
	      (stem:match "ible$")
	      (stem:match "ant$")
	      (stem:match "ement$")
	      (stem:match "ment$")
	      (stem:match "ent$")
	      (and (string-match "[st]\\(ion\\)$" str)
		   (setq stem (substring str 0 (match-beginning 1))))
	      (stem:match "ou$")
	      (stem:match "ism$")
	      (stem:match "ate$")
	      (stem:match "iti$")
	      (stem:match "ous$")
	      (stem:match "ive$")
	      (stem:match "ize$"))
	     (stem:m> 1))
	stem str)))


(defun stem:step5 (str) "$BBh(B5$BCJ3,$N(B stemming rule ($BHs8x3+4X?t(B)"
  (let ((stem))
    (if (or
	 (and (stem:match "e$")
	      (or (stem:m> 1)
		  (and (stem:m= 1)
		       (not (stem:*o)))))
	 (and (stem:match "ll$")
	      (setq stem (concat stem "l"))
	      (stem:m> 1)))
	stem str)))


(defvar stem:irregular-verb-alist
  '(("abode" "abide")
    ("abided" "abide")
    ("alighted" "alight")
    ("arose" "arise")
    ("arisen" "arise")
    ("awoke" "awake")
    ("awaked" "awake")
    ("awoken" "awake")
    ("baby-sat" "baby-sit")
    ("backbit" "backbite")
    ("backbitten" "backbite")
    ("backslid" "backslide")
    ("backslidden" "backslide")
    ("was" "be" "am" "is" "are")
    ("were" "be" "am" "is" "are")
    ("been" "be" "am" "is" "are")
    ("bore" "bear")
    ("bare" "bear")
    ("borne" "bear")
    ("born" "bear")
    ("beat" "beat")
    ("beaten" "beat")
    ("befell" "befall")
    ("befallen" "befall")
    ("begot" "beget")
    ("begat" "beget")
    ("begotten" "beget")
    ("began" "begin")
    ("begun" "begin")
    ("begirt" "begird")
    ("begirded" "begird")
    ("beheld" "behold")
    ("bent" "bend")
    ("bended" "bend")
    ("bereaved" "bereave")
    ("bereft" "bereave")
    ("besought" "beseech")
    ("beseeched" "beseech")
    ("beset" "beset")
    ("bespoke" "bespeak")
    ("bespoken" "bespeak")
    ("bestrewed" "bestrew")
    ("bestrewn" "bestrew")
    ("bestrode" "bestride")
    ("bestrid" "bestride")
    ("bestridden" "bestride")
    ("bet" "bet")
    ("betted" "bet")
    ("betook" "betake")
    ("betaken" "betake")
    ("bethought" "bethink")
    ("bade" "bid")
    ("bid" "bid")
    ("bad" "bid")
    ("bedden" "bid")
    ("bided" "bide")
    ("bode" "bide")
    ("bound" "bind")
    ("bit" "bite")
    ("bitten" "bite")
    ("bled" "bleed")
    ("blended" "blend")
    ("blent" "blend")
    ("blessed" "bless")
    ("blest" "bless")
    ("blew" "blow")
    ("blown" "blow")
    ("blowed" "blow")
    ("bottle-fed" "bottle-feed")
    ("broke" "break")
    ("broken" "break")
    ("breast-fed" "breast-feed")
    ("bred" "breed")
    ("brought" "bring")
    ("broadcast" "broadcast")
    ("broadcasted" "broadcast")
    ("browbeat" "browbeat")
    ("browbeaten" "browbeat")
    ("built" "build")
    ("builded" "build")
    ("burned" "burn")
    ("burnt" "burn")
    ("burst" "burst")
    ("busted" "bust")
    ("bust" "bust")
    ("bought" "buy")
    ("cast" "cast")
    ("chid" "chide")
    ("chided" "chide")
    ("chidden" "chide")
    ("chose" "choose")
    ("chosen" "choose")
    ("clove" "cleave")
    ("cleft" "cleave")
    ("cleaved" "cleave")
    ("cloven" "cleave")
    ("clave" "cleave")
    ("clung" "cling")
    ("clothed" "clothe")
    ("clad" "clothe")
    ("colorcast" "colorcast")
    ("clorcasted" "colorcast")
    ("came" "come")
    ("come" "come")
    ("cost" "cost")
    ("costed" "cost")
    ("countersank" "countersink")
    ("countersunk" "countersink")
    ("crept" "creep")
    ("crossbred" "crossbreed")
    ("crowed" "crow")
    ("crew" "crow")
    ("cursed" "curse")
    ("curst" "curse")
    ("cut" "cut")
    ("dared" "dare")
    ("durst" "dare")
    ("dealt" "deal")
    ("deep-froze" "deep-freeze")
    ("deep-freezed" "deep-freeze")
    ("deep-frozen" "deep-freeze")
    ("dug" "dig")
    ("digged" "dig")
    ("dived" "dive")
    ("dove" "dive")
    ("did" "do")
    ("done" "do")
    ("drew" "draw")
    ("drawn" "draw")
    ("dreamed" "dream")
    ("dreamt" "dream")
    ("drank" "drink")
    ("drunk" "drink")
    ("dripped" "drip")
    ("dript" "drip")
    ("drove" "drive")
    ("drave" "drive")
    ("driven" "drive")
    ("dropped" "drop")
    ("dropt" "drop")
    ("dwelt" "dwell")
    ("dwelled" "dwell")
    ("ate" "eat")
    ("eaten" "eat")
    ("fell" "fall")
    ("fallen" "fall")
    ("fed" "feed")
    ("felt" "feel")
    ("fought" "fight")
    ("found" "find")
    ("fled" "fly" "flee")
    ("flung" "fling")
    ("flew" "fly")
    ("flied" "fly")
    ("flown" "fly")
    ("forbore" "forbear")
    ("forborne" "forbear")
    ("forbade" "forbid")
    ("forbad" "forbid")
    ("forbidden" "forbid")
    ("forecast" "forecast")
    ("forecasted" "forecast")
    ("forewent" "forego")
    ("foregone" "forego")
    ("foreknew" "foreknow")
    ("foreknown" "foreknow")
    ("foreran" "forerun")
    ("forerun" "forerun")
    ("foresaw" "foresee")
    ("foreseen" "foresee")
    ("foreshowed" "foreshow")
    ("foreshown" "foreshow")
    ("foretold" "foretell")
    ("forgot" "forget")
    ("forgotten" "forget")
    ("forgave" "forgive")
    ("forgiven" "forgive")
    ("forwent" "forgo")
    ("forgone" "forgo")
    ("forsook" "forsake")
    ("forsaken" "forsake")
    ("forswore" "forswear")
    ("forsworn" "forswear")
    ("froze" "freeze")
    ("frozen" "freeze")
    ("gainsaid" "gainsay")
    ("gelded" "geld")
    ("gelt" "geld")
    ("got" "get")
    ("gotten" "get")
    ("ghostwrote" "ghostwrite")
    ("ghostwritten" "ghostwrite")
    ("gilded" "gild")
    ("gilt" "gild")
    ("girded" "gird")
    ("girt" "gird")
    ("gave" "give")
    ("given" "give")
    ("gnawed" "gnaw")
    ("gnawn" "gnaw")
    ("went" "wend" "go")
    ("gone" "go")
    ("graved" "grave")
    ("graven" "grave")
    ("ground" "grind")
    ("gripped" "grip")
    ("gript" "grip")
    ("grew" "grow")
    ("grown" "grow")
    ("hamstrung" "hamstring")
    ("hamstringed" "hamstring")
    ("hung" "hang")
    ("hanged" "hang")
    ("had" "have")
    ("heard" "hear")
    ("heaved" "heave")
    ("hove" "heave")
    ("hewed" "hew")
    ("hewn" "hew")
    ("hid" "hide")
    ("hidden" "hide")
    ("hit" "hit")
    ("held" "hold")
    ("hurt" "hurt")
    ("indwelt" "indwell")
    ("inlaid" "inlay")
    ("inlet" "inlet")
    ("inputted" "input")
    ("input" "input")
    ("inset" "inset")
    ("insetted" "inset")
    ("interwove" "interweave")
    ("interweaved" "interweave")
    ("jigsawed" "jigsaw")
    ("jigsawn" "jigsaw")
    ("kept" "keep")
    ("knelt" "kneel")
    ("kneeled" "kneel")
    ("knitted" "knit")
    ("knit" "knit")
    ("knew" "know")
    ("known" "know")
    ("laded" "lade")
    ("laden" "lade")
    ("laid" "lay")
    ("led" "lead")
    ("leaned" "lean")
    ("leant" "lean")
    ("leaped" "leap")
    ("leapt" "leap")
    ("learned" "learn")
    ("learnt" "learn")
    ("left" "leave")
    ("lent" "lend")
    ("let" "let")
    ("lay" "lie")
    ("lain" "lie")
    ("lighted" "light")
    ("lit" "light")
    ("lip-read" "lip-read")
    ("lost" "lose")
    ("made" "make")
    ("meant" "mean")
    ("met" "meet")
    ("melted" "melt")
    ("methougt" "methinks")
    ;; ("-" "methinks")
    ("misbecame" "misbecome")
    ("misbecome" "misbecome")
    ("miscast" "miscast")
    ("miscasted" "miscast")
    ("misdealt" "misdeal")
    ("misdid" "misdo")
    ("misdone" "misdo")
    ("misgave" "misgive")
    ("misgiven" "misgive")
    ("mishit" "mishit")
    ("mislaid" "mislay")
    ("misled" "mislead")
    ("misread" "misread")
    ("misspelt" "misspell")
    ("missplled" "misspell")
    ("misspent" "misspend")
    ("mistook" "mistake")
    ("mistaken" "mistake")
    ("misunderstood" "misunderstand")
    ("mowed" "mow")
    ("mown" "mow")
    ("offset" "offset")
    ("outbid" "outbid")
    ("outbade" "outbid")
    ("outbidden" "outbid")
    ("outdid" "outdo")
    ("outdone" "outdo")
    ("outfought" "outfight")
    ("outgrew" "outgrown")
    ("outgrown" "outgrown")
    ("outlaid" "outlay")
    ("output" "output")
    ("outputted" "output")
    ("ooutputted" "output")
    ("outrode" "outride")
    ("outridden" "outride")
    ("outran" "outrun")
    ("outrun" "outrun")
    ("outsold" "outsell")
    ("outshone" "outshine")
    ("outshot" "outshoot")
    ("outwore" "outwear")
    ("outworn" "outwear")
    ("overbore" "overbear")
    ("overborne" "overbear")
    ("overbid" "overbid")
    ("overblew" "overblow")
    ("overblown" "overblow")
    ("overcame" "overcome")
    ("overcome" "overcome")
    ("overdid" "overdo")
    ("overdone" "overdo")
    ("overdrew" "overdraw")
    ("overdrawn" "overdraw")
    ("overdrank" "overdrink")
    ("overdrunk" "overdrink")
    ("overate" "overeat")
    ("overeaten" "overeat")
    ("overfed" "overfeed")
    ("overflowed" "overflow")
    ("overflown" "overfly" "overflow")
    ("overflew" "overfly")
    ("overgrew" "overgrow")
    ("overgrown" "overgrow")
    ("overhung" "overhang")
    ("overhanged" "overhang")
    ("ovearheard" "overhear")
    ("overlaid" "overlay")
    ("overleaped" "overleap")
    ("overleapt" "overleap")
    ("overlay" "overlie")
    ("overlain" "overlie")
    ("overpaid" "overpay")
    ("overrode" "override")
    ("overridden" "override")
    ("overran" "overrun")
    ("overrun" "overrun")
    ("oversaw" "oversee")
    ("overseen" "oversee")
    ("oversold" "oversell")
    ("overset" "overset")
    ("overshot" "overshoot")
    ("overspent" "overspend")
    ("overspread" "overspread")
    ("overtook" "overtake")
    ("overtaken" "overtake")
    ("overthrew" "overthrow")
    ("overthrown" "overthrow")
    ("overworked" "overwork")
    ("overwrought" "overwork")
    ("partook" "partake")
    ("partaken" "partake")
    ("paid" "pay")
    ("penned" "pen")
    ("pent" "pen")
    ("pinch-hit" "pinch-hit")
    ("pleaded" "plead")
    ("plead" "plead")
    ("pled" "plead")
    ("prepaid" "prepay")
    ("preset" "preset")
    ("proofread" "proofread")
    ("proved" "prove")
    ("proven" "prove")
    ("put" "put")
    ("quick-froze" "quick-freeze")
    ("quick-frozen" "quick-freeze")
    ("quit" "quit")
    ("quitted" "quit")
    ("read" "read")
    ("reaved" "reave")
    ("reft" "reave")
    ("rebound" "rebind")
    ("rebroadcast" "rebroadcast")
    ("rebroadcasted" "rebroadcast")
    ("rebuilt" "rebuild")
    ("recast" "recast")
    ("recasted" "recast")
    ("re-did" "re-do")
    ("re-done" "re-do")
    ("reeved" "reeve")
    ("rove" "reeve")
    ("reheard" "rehear")
    ("relaid" "relay")
    ("remade" "remake")
    ("rent" "rend")
    ("repaid" "repay")
    ("reread" "reread")
    ("reran" "rerun")
    ("rerun" "rerun")
    ("resold" "resell")
    ("reset" "reset")
    ("retook" "retake")
    ("retaken" "retake")
    ("retold" "retell")
    ("rethought" "rethink")
    ("rewound" "rewind")
    ("rewinded" "rewind")
    ("rewrote" "rewrite")
    ("rewritten" "rewrite")
    ("rid" "ride") ;; ("rid" "ride" "rid")
    ("ridded" "rid")
    ("rode" "ride")
    ("ridden" "ride")
    ("rang" "ring")
    ("rung" "ring")
    ("rose" "rise")
    ("risen" "rise")
    ("rived" "rive")
    ("riven" "rive")
    ("roughcast" "roughcast")
    ("roughhewed" "roughhew")
    ("roughhewn" "roughhew")
    ("ran" "run")
    ("run" "run")
    ("sawed" "saw")
    ("sawn" "saw")
    ("said" "say")
    ("saw" "see")
    ("seen" "see")
    ("sought" "seek")
    ("sold" "sell")
    ("sent" "send")
    ("set" "set")
    ("sewed" "sew")
    ("sewn" "sew")
    ("shook" "shake")
    ("shaken" "shake")
    ("shaved" "shave")
    ("shaven" "shave")
    ("sheared" "shear")
    ("shore" "shear")
    ("shorn" "shear")
    ("shed" "shed")
    ("shone" "shine")
    ("shined" "shine")
    ("shit" "shit")
    ("shat" "shit")
    ("shitted" "shit")
    ("shod" "shoe")
    ("shoed" "shoe")
    ("shot" "shoot")
    ("showed" "show")
    ("shown" "show")
    ("shredded" "shred")
    ("shred" "shred")
    ("shrank" "shrink")
    ("shrunk" "shrink")
    ("shrunken" "shrink")
    ("shrived" "shrive")
    ("shrove" "shrive")
    ("shriven" "shrive")
    ("shut" "shut")
    ("sight-read" "sight-read")
    ("simulcast" "simulcast")
    ("simulcasted" "simulcast")
    ("sang" "sing")
    ("sung" "sing")
    ("sank" "sink")
    ("sunk" "sink")
    ("sunken" "sink")
    ("sat" "sit")
    ("sate" "sit")
    ("slew" "slay")
    ("slain" "slay")
    ("slept" "sleep")
    ("slid" "slide")
    ("slidden" "slide")
    ("slunk" "slink")
    ("smelled" "smell")
    ("smelt" "smell")
    ("smote" "smite")
    ("smitten" "smite")
    ("smit" "smite")
    ("sowed" "sow")
    ("sown" "sow")
    ("spoke" "speak")
    ("spoken" "speak")
    ("sped" "speed")
    ("speeded" "speed")
    ("spelled" "spell")
    ("spelt" "spell")
    ("spellbound" "spellbind")
    ("spent" "spend")
    ("spilled" "spill")
    ("spilt" "spill")
    ("spun" "spin")
    ("span" "spin")
    ("spat" "spit")
    ("spit" "spit")
    ("split" "split")
    ("spoiled" "spoil")
    ("spoilt" "spoil")
    ("spoon-fed" "spoon-feed")
    ("spread" "spread")
    ("sprang" "spring")
    ("sprung" "spring")
    ("stood" "stand")
    ("staved" "stave")
    ("stove" "stave")
    ("stayed" "stay")
    ("staid" "stay")
    ("stole" "steal")
    ("stolen" "steal")
    ("stuck" "stick")
    ("stung" "sting")
    ("stank" "stink")
    ("stunk" "stink")
    ("strewed" "strew")
    ("strewn" "strew")
    ("strode" "stride")
    ("stridden" "stride")
    ("struck" "strike")
    ("stricken" "strike")
    ("strung" "string")
    ("strove" "strive")
    ("striven" "strive")
    ("sublet" "sublet")
    ("sunburned" "sunburn")
    ("sunburnt" "sunburn")
    ("swore" "swear")
    ("sware" "swear")
    ("sworn" "swear")
    ("sweat" "sweat")
    ("sweated" "sweat")
    ("swept" "sweep")
    ("swelled" "swell")
    ("swollen" "swell")
    ("swam" "swim")
    ("swum" "swim")
    ("swung" "swing")
    ("took" "take")
    ("taken" "take")
    ("taught" "teach")
    ("tore" "tear")
    ("torn" "tear")
    ("telecast" "telecast")
    ("telecasted" "telecast")
    ("told" "tell")
    ("thought" "think")
    ("thrived" "thrive")
    ("throve" "thrive")
    ("thriven" "thrive")
    ("threw" "thrown")
    ("thrown" "thrown")
    ("thrust" "thrust")
    ("tossed" "toss")
    ("tost" "toss")
    ("trod" "tread")
    ("treaded" "tread")
    ("trode" "tread")
    ("trodden" "tread")
    ("typecast" "typecast")
    ("typewrote" "typewrite")
    ("typewritten" "typewrite")
    ("unbent" "unbend")
    ("unbended" "unbend")
    ("unbound" "unbind")
    ("underbid" "underbid")
    ("underbidden" "underbid")
    ("undercut" "undercut")
    ("underwent" "undergo")
    ("undergone" "undergo")
    ("underlaid" "underlay")
    ("underlay" "underlie")
    ("underlain" "underlie")
    ("underpaid" "underpay")
    ("undersold" "undersell")
    ("undershot" "undershoot")
    ("understood" "understand")
    ("undertook" "undertake")
    ("undertaken" "undertake")
    ("underwrote" "underwrite")
    ("underwritten" "underwrite")
    ("undid" "undo")
    ("undone" "undo")
    ("undrew" "undraw")
    ("undrawn" "undraw")
    ("ungirded" "ungird")
    ("ungirt" "ungird")
    ("unlearnt" "unlearn")
    ("unlearned" "unlearn")
    ("unmade" "unmake")
    ("unsaid" "unsay")
    ("unstuck" "unstick")
    ("unstrung" "unstring")
    ("unwound" "unwind")
    ("upheld" "uphold")
    ("uprose" "uprise")
    ("uprisen" "uprise")
    ("upset" "upset")
    ("upswept" "upsweep")
    ("woke" "wake")
    ("waked" "wake")
    ("woken" "wake")
    ("waylaid" "waylay")
    ("wore" "wear")
    ("worn" "wear")
    ("wove" "weave")
    ("weaved" "weave")
    ("woven" "weave")
    ("wed" "wed")
    ("wedded" "wed")
    ("wept" "weep")
    ("wended" "wend")
    ("wetted" "wet")
    ("wet" "wet")
    ("won" "win")
    ("wound" "wind")
    ("winded" "wind")
    ("wiredrew" "wiredraw")
    ("wiredrawn" "wiredraw")
    ("wist" "wit")
    ("withdrew" "withdraw")
    ("withdrawn" "withdraw")
    ("withheld" "withhold")
    ("withstood" "withstand")
    ("worked" "work")
    ("wrought" "work")
    ("wrapped" "wrap")
    ("wrapt" "wrap")
    ("wrung" "wring")
    ("wrote" "write")
    ("writ" "write")
    ("written" "write"))
  "$BIT5,B'F0;l$H867A$NO"A[G[Ns(B")


(defun stem:extra (str) "\
$BF0;l(B/$B7AMF;l$N3hMQ7A$HL>;l$NJ#?t7A$N3hMQ8lHx$r<h$j=|$/Hs8x3+4X?t(B
$BM?$($i$l$?8l$N867A$H$7$F2DG=@-$N$"$k8l$N%j%9%H$rJV$9(B"
  (or (assoc str stem:irregular-verb-alist)
      (let (c l stem)
	(setq l (cond
		 ;; $BHf3S5i(B/$B:G>e5i(B
		 ((stem:match "\\([^aeiou]\\)\\1e\\(r\\|st\\)$")
		  (list (substring str (match-beginning 1) (match-end 1))
			(substring str (match-beginning 0) (match-beginning 2))))
		 ((stem:match "\\([^aeiou]\\)ie\\(r\\|st\\)$")
		  (setq c (substring str (match-beginning 1) (match-end 1)))
		  (list c (concat c "y") (concat c "ie")))
		 ((stem:match "e\\(r\\|st\\)$") '("" "e"))
		 ;; 3$BC18=(B/$BJ#?t7A(B
		 ((stem:match "ches$") '("ch" "che"))
		 ((stem:match "shes$") '("sh" "che"))
		 ((stem:match "ses$") '("s" "se"))
		 ((stem:match "xes$") '("x" "xe"))
		 ((stem:match "zes$") '("z" "ze"))
		 ((stem:match "ves$") '("f" "fe"))
		 ((stem:match "\\([^aeiou]\\)oes$")
		  (setq c (substring str -4 -3))
		  (list c (concat c "o") (concat c "oe")))
		 ((stem:match "\\([^aeiou]\\)ies$")
		  (setq c (substring str -4 -3))
		  (list c (concat c "y") (concat c "ie")))
		 ((stem:match "es$") '("" "e"))
		 ((stem:match "s$") '(""))
		 ;; $B2a5n7A(B/$B2a5nJ,;l(B
		 ((stem:match "\\([^aeiou]\\)ied$")
		  (setq c (substring str -4 -3))
		  (list c (concat c "y") (concat c "ie")))
		 ((stem:match "\\([^aeiou]\\)\\1ed$")
		  (list (substring str -4 -3)
			(substring str -4 -1)))
		 ((stem:match "cked$") '("c" "cke"))
		 ((stem:match "ed$") '("" "e"))
		 ;; $B8=:_J,;l(B
		 ((stem:match "\\([^aeiou]\\)\\1ing$")
		  (list (substring str -5 -4)))
		 ((stem:match "ing$") '("" "e"))
		 ))
	(append (mapcar '(lambda (s) (concat stem s)) l)
		(list str))
	)))



;;;============================================================
;;;	$B8x3+4X?t(B
;;;============================================================

(defun stem:stripping-suffix (str) "\
$B3hMQ8lHx$r<h$j=|$/4X?t(B
$BM?$($i$l$?8l$N85$N8l$H$7$F2DG=@-$N$"$k8l$N<-=q=g$N%j%9%H$rJV$9(B"
  (save-match-data
    (delq nil (let ((w ""))
		(mapcar
		 (function (lambda (x) (if (string= x w) nil (setq w x))))
		 (sort (append
			;; $BBgJ8;z$r>.J8;z$KJQ49(B
			(list (prog1 str (setq str (downcase str))))
			;; $BFH<+$N%R%e!<%j%9%F%#%C%/%9$rE,MQ(B
			(stem:extra str)
			(if (> (length str) stem:minimum-word-length)
			    ;; $BC18lD9$,>r7o$rK~$?$;$P!"(BPorter $B$N%"%k%4%j%:%`$rE,MQ(B
			    (mapcar
			     '(lambda (func)
				(setq str (funcall func str)))
			     '(stem:step1 stem:step2 stem:step3 stem:step4 stem:step5))))
		       'string<))))))


;;;###autoload
(defun stem-english (str) "\
$B3hMQ8lHx$r<h$j=|$/4X?t(B
$BM?$($i$l$?8l$N85$N8l$H$7$F2DG=@-$N$"$k8l$NJ8;zNsD9$N>:=g$N%j%9%H$rJV$9(B"
  (sort (stem:stripping-suffix str)
	(function (lambda (a b) (< (length a) (length b))))))

;; $B$3$N(B stem-english $B$NF0:n$O!"(B
;; 
;;     Id: stem.el,v 1.4 1998/11/30 09:27:27 tsuchiya Exp tsuchiya
;; 
;; $B0JA0$N%P!<%8%g%s$N(B stem.el $B$GDj5A$5$l$F$$$?(B stem:stripping-suffix 
;; $B$NF0:n$H8_49$G$"$k!#8=:_$N(B stem:stripping-suffix $B$O<-=q=g$N%j%9%H$r(B
;; $BJV$9$?$a!"0[$J$kF0:n$H$9$k$h$&$K$J$C$F$$$k$N$GCm0U$9$k$3$H!#(B


;;; Porter $B$N%"%k%4%j%:%`$rE,MQ$9$k4X?t(B
(defun stem:stripping-inflection (word) "\
Porter $B$N%"%k%4%j%:%`$K4p$E$$$FGI@88l$r=hM}$9$k4X?t(B"
  (save-match-data
    (stem:step5
     (stem:step4
      (stem:step3
       (stem:step2
	(stem:step1 word)))))))
