Examples
library(install.load)
load_package("iemisctext", "tm") # load needed packages using the load_package function from the install.load package (it is assumed that you have already installed these packages)
# anarchy
# Example 1
data(anarchy)
# create a document term matrix of anarchy
a <- DocumentTermMatrix(anarchy)
# find terms used > 5 times
findFreqTerms(a, 5)
## [1] "`'tis" "`and" "`let" "`thou" "all" "and" "are"
## [8] "art" "blood" "but" "earth" "england" "english" "every"
## [15] "for" "from" "had" "have" "her" "his" "its"
## [22] "let" "like" "make" "not" "one" "over" "own"
## [29] "such" "that" "the" "their" "them" "they" "those"
## [36] "thou" "through" "thy" "upon" "was" "when" "where"
## [43] "which" "who" "will" "with" "your"
# Zipf's law
Zipf_plot(a)
## (Intercept) x
## 3.4810675 -0.5440241
# war_prayer
# Example 1
data(war_prayer)
# create a document term matrix of war_prayer
wp <- DocumentTermMatrix(war_prayer)
# find terms used > 5 times
findFreqTerms(wp, 5)
## [1] "and" "for" "from" "god" "have" "help" "his"
## [8] "our" "prayed" "that" "the" "their" "them" "upon"
## [15] "war" "was" "which" "who" "with" "you" "your"
# Zipf's law
Zipf_plot(wp)
## (Intercept) x
## 2.824209 -0.478659
# war_racket
# Example 1
data(war_racket)
# create a document term matrix of war_racket
wr <- DocumentTermMatrix(war_racket)
# find terms used > 5 times
findFreqTerms(wr, 5)
## [1] "about" "all" "american" "and"
## [5] "another" "any" "are" "arms"
## [9] "average" "back" "bankers" "been"
## [13] "before" "being" "bonds" "bought"
## [17] "boys" "business" "but" "called"
## [21] "came" "can" "can't" "cannot"
## [25] "capital" "cent" "cent." "chapter"
## [29] "cost" "could" "did" "does"
## [33] "dollars" "don't" "during" "each"
## [37] "earnings" "eighteen" "end" "even"
## [41] "every" "few" "for" "france"
## [45] "from" "general" "germany" "get"
## [49] "got" "government" "great" "had"
## [53] "has" "have" "his" "home"
## [57] "how" "huge" "hundred" "increase"
## [61] "industry" "international" "into" "it,"
## [65] "it." "its" "just" "kill"
## [69] "less" "let" "let's" "liberty"
## [73] "like" "limit" "little" "lot"
## [77] "made" "make" "manufacturers" "many"
## [81] "men" "might" "more" "mosquito"
## [85] "munitions" "must" "nation" "never"
## [89] "no." "normal" "not" "now"
## [93] "off" "one" "only" "other"
## [97] "our" "out" "over" "own"
## [101] "paid" "part" "pay" "pays"
## [105] "people" "per" "period" "profit"
## [109] "profits" "profits." "put" "racket"
## [113] "sam" "same" "ships" "should"
## [117] "smash" "sold" "soldier" "soldiers"
## [121] "some" "states" "steel" "still"
## [125] "take" "than" "that" "the"
## [129] "their" "them" "then" "there"
## [133] "these" "they" "this" "those"
## [137] "thousands" "three" "time" "told"
## [141] "too" "too." "total" "two"
## [145] "uncle" "united" "until" "upon"
## [149] "use" "very" "war" "war,"
## [153] "war." "was" "well" "well,"
## [157] "went" "were" "what" "when"
## [161] "where" "whether" "who" "will"
## [165] "with" "without" "world" "would"
## [169] "year" "year." "yearly" "years"
## [173] "yes," "you" "your"
# Zipf's law
Zipf_plot(wr)
## (Intercept) x
## 5.212469 -0.728547
# connect_dots
# Example 1
data(connect_dots)
# create a document term matrix of connect_dots
cd <- DocumentTermMatrix(connect_dots)
# find terms used > 5 times
findFreqTerms(cd, 5)
## [1] "and" "for" "has" "have" "must" "our"
## [7] "petroleum" "that" "the"
# Zipf's law
Zipf_plot(cd)
## (Intercept) x
## 2.5493059 -0.4666954
# us_them
# Example 1
data(us_them)
# create a document term matrix of us_them
ut <- DocumentTermMatrix(us_them)
# find terms used > 5 times
findFreqTerms(ut, 5)
## [1] "all" "and" "for" "freedom" "gave" "know" "not"
## [8] "our" "tha" "the" "their" "them" "they" "will"
## [15] "won't"
# Zipf's law
Zipf_plot(ut)
## (Intercept) x
## 2.9193372 -0.5665568
# climate_strange
# Example 1
data(climate_strange)
# create a document term matrix of climate_strange
cs <- DocumentTermMatrix(climate_strange)
# find terms used > 5 times
findFreqTerms(cs, 5)
## [1] "all" "are" "can" "earth" "for" "have" "life"
## [8] "must" "not" "our" "science" "that" "the" "this"
## [15] "your"
# Zipf's law
Zipf_plot(cs)
## (Intercept) x
## 2.2297149 -0.3823361
library(install.load)
load_package("iemisctext", "tm") # load needed packages using the load_package function from the install.load package (it is assumed that you have already installed these packages)
# anarchy
# Example 2
data(anarchy)
# View text representation of anarchy
inspect(anarchy[[1]])
# war_prayer
# Example 2
data(war_prayer)
# View text representation of war_prayer
inspect(war_prayer[[1]])
# war_racket
# Example 2
data(war_racket)
# View text representation of war_racket
inspect(war_racket[[1]])
# connect_dots
# Example 2
data(connect_dots)
# View text representation of connect_dots
inspect(connect_dots[[1]])
# us_them
# Example 2
data(us_them)
# View text representation of us_them
inspect(us_them[[1]])
# climate_strange
# Example 2
data(climate_strange)
# View text representation of climate_strange
inspect(climate_strange[[1]])
LS0tCnRpdGxlOiAiRXhhbXBsZSBBbmFseXNpcyB1c2luZyB0aGUgdG0gcGFja2FnZSIKYXV0aG9yOiAiSXJ1Y2thIEVtYnJ5IgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICBybWFya2Rvd246Omh0bWxfZG9jdW1lbnQ6CiAgICBoaWdobGlnaHQ6IGthdGUKICAgIHRoZW1lOiByZWFkYWJsZQogICAgY29kZV9kb3dubG9hZDogVFJVRQp2aWduZXR0ZTogPgogICVcVmlnbmV0dGVJbmRleEVudHJ5e0V4YW1wbGVzIHVzaW5nIHRoZSB0bSBwYWNrYWdlfQogICVcVmlnbmV0dGVFbmdpbmV7a25pdHI6OnJtYXJrZG93bn0KICAlXFZpZ25ldHRlRW5jb2Rpbmd7VVRGLTh9Ci0tLQoKIyBFeGFtcGxlcwoKCmBgYHtyLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRX0KbGlicmFyeShpbnN0YWxsLmxvYWQpCmxvYWRfcGFja2FnZSgiaWVtaXNjdGV4dCIsICJ0bSIpICMgbG9hZCBuZWVkZWQgcGFja2FnZXMgdXNpbmcgdGhlIGxvYWRfcGFja2FnZSBmdW5jdGlvbiBmcm9tIHRoZSBpbnN0YWxsLmxvYWQgcGFja2FnZSAoaXQgaXMgYXNzdW1lZCB0aGF0IHlvdSBoYXZlIGFscmVhZHkgaW5zdGFsbGVkIHRoZXNlIHBhY2thZ2VzKQoKCiMgYW5hcmNoeQojIEV4YW1wbGUgMQoKZGF0YShhbmFyY2h5KQoKIyBjcmVhdGUgYSBkb2N1bWVudCB0ZXJtIG1hdHJpeCBvZiBhbmFyY2h5CmEgPC0gRG9jdW1lbnRUZXJtTWF0cml4KGFuYXJjaHkpCgojIGZpbmQgdGVybXMgdXNlZCA+IDUgdGltZXMKZmluZEZyZXFUZXJtcyhhLCA1KQoKIyBaaXBmJ3MgbGF3ClppcGZfcGxvdChhKQoKCgoKIyB3YXJfcHJheWVyCiMgRXhhbXBsZSAxCgpkYXRhKHdhcl9wcmF5ZXIpCgojIGNyZWF0ZSBhIGRvY3VtZW50IHRlcm0gbWF0cml4IG9mIHdhcl9wcmF5ZXIKd3AgPC0gRG9jdW1lbnRUZXJtTWF0cml4KHdhcl9wcmF5ZXIpCgojIGZpbmQgdGVybXMgdXNlZCA+IDUgdGltZXMKZmluZEZyZXFUZXJtcyh3cCwgNSkKCiMgWmlwZidzIGxhdwpaaXBmX3Bsb3Qod3ApCgoKCgojIHdhcl9yYWNrZXQKIyBFeGFtcGxlIDEKCmRhdGEod2FyX3JhY2tldCkKCiMgY3JlYXRlIGEgZG9jdW1lbnQgdGVybSBtYXRyaXggb2Ygd2FyX3JhY2tldAp3ciA8LSBEb2N1bWVudFRlcm1NYXRyaXgod2FyX3JhY2tldCkKCiMgZmluZCB0ZXJtcyB1c2VkID4gNSB0aW1lcwpmaW5kRnJlcVRlcm1zKHdyLCA1KQoKIyBaaXBmJ3MgbGF3ClppcGZfcGxvdCh3cikKCgoKCiMgY29ubmVjdF9kb3RzCiMgRXhhbXBsZSAxCgpkYXRhKGNvbm5lY3RfZG90cykKCiMgY3JlYXRlIGEgZG9jdW1lbnQgdGVybSBtYXRyaXggb2YgY29ubmVjdF9kb3RzCmNkIDwtIERvY3VtZW50VGVybU1hdHJpeChjb25uZWN0X2RvdHMpCgojIGZpbmQgdGVybXMgdXNlZCA+IDUgdGltZXMKZmluZEZyZXFUZXJtcyhjZCwgNSkKCiMgWmlwZidzIGxhdwpaaXBmX3Bsb3QoY2QpCgoKCgojIHVzX3RoZW0KIyBFeGFtcGxlIDEKCmRhdGEodXNfdGhlbSkKCiMgY3JlYXRlIGEgZG9jdW1lbnQgdGVybSBtYXRyaXggb2YgdXNfdGhlbQp1dCA8LSBEb2N1bWVudFRlcm1NYXRyaXgodXNfdGhlbSkKCiMgZmluZCB0ZXJtcyB1c2VkID4gNSB0aW1lcwpmaW5kRnJlcVRlcm1zKHV0LCA1KQoKIyBaaXBmJ3MgbGF3ClppcGZfcGxvdCh1dCkKCgoKCiMgY2xpbWF0ZV9zdHJhbmdlCiMgRXhhbXBsZSAxCgpkYXRhKGNsaW1hdGVfc3RyYW5nZSkKCiMgY3JlYXRlIGEgZG9jdW1lbnQgdGVybSBtYXRyaXggb2YgY2xpbWF0ZV9zdHJhbmdlCmNzIDwtIERvY3VtZW50VGVybU1hdHJpeChjbGltYXRlX3N0cmFuZ2UpCgojIGZpbmQgdGVybXMgdXNlZCA+IDUgdGltZXMKZmluZEZyZXFUZXJtcyhjcywgNSkKCiMgWmlwZidzIGxhdwpaaXBmX3Bsb3QoY3MpCmBgYAoKCmBgYHtyIGV2YWwgPSBGQUxTRX0KbGlicmFyeShpbnN0YWxsLmxvYWQpCmxvYWRfcGFja2FnZSgiaWVtaXNjdGV4dCIsICJ0bSIpICMgbG9hZCBuZWVkZWQgcGFja2FnZXMgdXNpbmcgdGhlIGxvYWRfcGFja2FnZSBmdW5jdGlvbiBmcm9tIHRoZSBpbnN0YWxsLmxvYWQgcGFja2FnZSAoaXQgaXMgYXNzdW1lZCB0aGF0IHlvdSBoYXZlIGFscmVhZHkgaW5zdGFsbGVkIHRoZXNlIHBhY2thZ2VzKQoKIyBhbmFyY2h5CiMgRXhhbXBsZSAyCgpkYXRhKGFuYXJjaHkpCgojIFZpZXcgdGV4dCByZXByZXNlbnRhdGlvbiBvZiBhbmFyY2h5Cmluc3BlY3QoYW5hcmNoeVtbMV1dKQoKCgoKIyB3YXJfcHJheWVyCiMgRXhhbXBsZSAyCgpkYXRhKHdhcl9wcmF5ZXIpCgojIFZpZXcgdGV4dCByZXByZXNlbnRhdGlvbiBvZiB3YXJfcHJheWVyCmluc3BlY3Qod2FyX3ByYXllcltbMV1dKQoKCgoKIyB3YXJfcmFja2V0CiMgRXhhbXBsZSAyCgpkYXRhKHdhcl9yYWNrZXQpCgojIFZpZXcgdGV4dCByZXByZXNlbnRhdGlvbiBvZiB3YXJfcmFja2V0Cmluc3BlY3Qod2FyX3JhY2tldFtbMV1dKQoKCgoKIyBjb25uZWN0X2RvdHMKIyBFeGFtcGxlIDIKCmRhdGEoY29ubmVjdF9kb3RzKQoKIyBWaWV3IHRleHQgcmVwcmVzZW50YXRpb24gb2YgY29ubmVjdF9kb3RzCmluc3BlY3QoY29ubmVjdF9kb3RzW1sxXV0pCgoKCgojIHVzX3RoZW0KIyBFeGFtcGxlIDIKCmRhdGEodXNfdGhlbSkKCiMgVmlldyB0ZXh0IHJlcHJlc2VudGF0aW9uIG9mIHVzX3RoZW0KaW5zcGVjdCh1c190aGVtW1sxXV0pCgoKCgojIGNsaW1hdGVfc3RyYW5nZQojIEV4YW1wbGUgMgoKZGF0YShjbGltYXRlX3N0cmFuZ2UpCgojIFZpZXcgdGV4dCByZXByZXNlbnRhdGlvbiBvZiBjbGltYXRlX3N0cmFuZ2UKaW5zcGVjdChjbGltYXRlX3N0cmFuZ2VbWzFdXSkKYGBgCg==