changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / lisp/lib/nlp/data.lisp

changeset 698: 96958d3eb5b0
parent: 9b573fc6bc40
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 (defpackage :nlp/data
2  (:use :cl :std)
3  (:export
4  :language-data
5  :*language-data*
6  :stop-words-lookup
7  :stop-words))
8 
9 (in-package :nlp/data)
10 
11 (defclass language-data ()
12  ((stop-words
13  :initarg :stop-words
14  :accessor stop-words
15  :initform
16  (list "a" "able" "about" "above" "according" "accordingly" "across" "actually" "after"
17  "afterwards" "again" "against" "ain't" "all" "allow" "allows" "almost" "alone"
18  "along" "already" "also" "although" "always" "am" "among" "amongst" "an" "and"
19  "another" "any" "anybody" "anyhow" "anyone" "anything" "anyway" "anyways" "anywhere"
20  "apart" "appear" "appreciate" "appropriate" "are" "aren't" "around" "as" "a's" "aside"
21  "ask" "asking" "associated" "at" "available" "away" "awfully" "be" "became" "because" "become"
22  "becomes" "becoming" "been" "before" "beforehand" "behind" "being" "believe" "below" "beside"
23  "besides" "best" "better" "between" "beyond" "both" "brief" "but" "by" "came" "can" "cannot"
24  "cant" "can't" "cause" "causes" "certain" "certainly" "changes" "clearly" "c'mon" "co" "com"
25  "come" "comes" "concerning" "consequently" "consider" "considering" "contain" "containing"
26  "contains" "corresponding" "could" "couldn't" "course" "c's" "currently" "definitely" "described"
27  "despite" "did" "didn't" "different" "do" "does" "doesn't" "doing" "don" "done" "don't" "down"
28  "downwards" "during" "each" "edu" "eg" "eight" "either" "else" "elsewhere" "enough" "entirely"
29  "especially" "et" "etc" "even" "ever" "every" "everybody" "everyone" "everything" "everywhere"
30  "ex" "exactly" "example" "except" "far" "few" "fifth" "first" "five" "followed" "following" "follows"
31  "for" "former" "formerly" "forth" "four" "from" "further" "furthermore" "get" "gets" "getting" "given"
32  "gives" "go" "goes" "going" "gone" "got" "gotten" "greetings" "had" "hadn't" "happens" "hardly" "has"
33  "hasn't" "have" "haven't" "having" "he" "he'd" "he'll" "hello" "help" "hence" "her" "here" "hereafter"
34  "hereby" "herein" "here's" "hereupon" "hers" "herself" "he's" "hi" "him" "himself" "his" "hither"
35  "hopefully" "how" "howbeit" "however" "how's" "i" "i'd" "ie" "if" "ignored" "i'll" "i'm" "immediate"
36  "in" "inasmuch" "inc" "indeed" "indicate" "indicated" "indicates" "inner" "insofar" "instead" "into"
37  "inward" "is" "isn't" "it" "it'd" "it'll" "its" "it's" "itself" "i've" "just" "keep" "keeps" "kept"
38  "know" "known" "knows" "last" "lately" "later" "latter" "latterly" "least" "less" "lest" "let" "let's"
39  "like" "liked" "likely" "little" "look" "looking" "looks" "ltd" "mainly" "many" "may" "maybe" "me"
40  "mean" "meanwhile" "merely" "might" "more" "moreover" "most" "mostly" "much" "must" "mustn't" "my"
41  "myself" "name" "namely" "nd" "near" "nearly" "necessary" "need" "needs" "neither" "never" "nevertheless"
42  "new" "next" "nine" "no" "nobody" "non" "none" "noone" "nor" "normally" "not" "nothing" "novel" "now"
43  "nowhere" "obviously" "of" "off" "often" "oh" "ok" "okay" "old" "on" "once" "one" "ones" "only" "onto"
44  "or" "other" "others" "otherwise" "ought" "our" "ours" "ourselves" "out" "outside" "over" "overall" "own"
45  "particular" "particularly" "per" "perhaps" "placed" "please" "plus" "possible" "presumably" "probably"
46  "provides" "que" "quite" "qv" "rather" "rd" "re" "really" "reasonably" "regarding" "regardless" "regards"
47  "relatively" "respectively" "right" "s" "said" "same" "saw" "say" "saying" "says" "second" "secondly" "see"
48  "seeing" "seem" "seemed" "seeming" "seems" "seen" "self" "selves" "sensible" "sent" "serious" "seriously"
49  "seven" "several" "shall" "shan't" "she" "she'd" "she'll" "she's" "should" "shouldn't" "since" "six" "so"
50  "some" "somebody" "somehow" "someone" "something" "sometime" "sometimes" "somewhat" "somewhere" "soon" "sorry"
51  "specified" "specify" "specifying" "still" "sub" "such" "sup" "sure" "t" "take" "taken" "tell" "tends" "th"
52  "than" "thank" "thanks" "thanx" "that" "thats" "that's" "the" "their" "theirs" "them" "themselves" "then"
53  "thence" "there" "thereafter" "thereby" "therefore" "therein" "theres" "there's" "thereupon" "these" "they"
54  "they'd" "they'll" "they're" "they've" "think" "third" "this" "thorough" "thoroughly" "those" "though"
55  "three" "through" "throughout" "thru" "thus" "to" "together" "too" "took" "toward" "towards" "tried"
56  "tries" "truly" "try" "trying" "t's" "twice" "two" "un" "under" "unfortunately" "unless" "unlikely"
57  "until" "unto" "up" "upon" "us" "use" "used" "useful" "uses" "using" "usually" "value" "various" "very"
58  "via" "viz" "vs" "want" "wants" "was" "wasn't" "way" "we" "we'd" "welcome" "well" "we'll" "went" "were"
59  "we're" "weren't" "we've" "what" "whatever" "what's" "when" "whence" "whenever" "when's" "where"
60  "whereafter" "whereas" "whereby" "wherein" "where's" "whereupon" "wherever" "whether" "which" "while"
61  "whither" "who" "whoever" "whole" "whom" "who's" "whose" "why" "why's" "will" "willing" "wish" "with"
62  "within" "without" "wonder" "won't" "would" "wouldn't" "yes" "yet" "you" "you'd" "you'll" "your"
63  "you're" "yours" "yourself" "yourselves" "you've" "zero"))
64  (stop-words-lookup :accessor stop-words-lookup)))
65 
66 (defmethod initialize-instance :after ((data language-data) &key)
67  (setf (stop-words-lookup data)
68  (loop with ht = (make-hash-table :test #'equal)
69  for stop in (stop-words data)
70  do (setf (gethash stop ht) t)
71  finally (return ht))))
72 
73 (defparameter *language-data* (make-instance 'language-data))