First 100 words:
>>> text6[:100]
['SCENE', '1', ':', '[', 'wind', ']', '[', 'clop', 'clop', 'clop', ']', 'KING', 'ARTHUR', ':', 'Whoa', 'there', '!', '[', 'clop', 'clop', 'clop', ']', 'SOLDIER', '#', '1', ':', 'Halt', '!', 'Who', 'goes', 'there', '?', 'ARTHUR', ':', 'It', 'is', 'I', ',', 'Arthur', ',', 'son', 'of', 'Uther', 'Pendragon', ',', 'from', 'the', 'castle', 'of', 'Camelot', '.', 'King', 'of', 'the', 'Britons', ',', 'defe
ator', 'of', 'the', 'Saxons', ',', 'sovereign', 'of', 'all', 'England', '!', 'SOLDIER', '#', '1', ':', 'Pull', 'the', 'other', 'one', '!', 'ARTHUR', ':', 'I', 'am', ',', '...', 'and', 'this', 'is', 'my', 'trusty', 'servant', 'Patsy', '.', 'We', 'have', 'ridden', 'the', 'length', 'and', 'breadth', 'of', 'the', 'land', 'in']
>>>Playing around:
>>> text9
<Text: The Man Who Was Thursday by G . K . Chesterton 1908>
>>> text9.generate()
Building ngram index...
[ The Man Who Was Thursday by G . K . C . CHAPTER VI THE EXPOSURE SUCH
were the dynamiters . I ' m sorry to cut short finally as by an
innocent courtesy characteristic of him now with their flowers and
frock - coat , he began to look at the bright sea below , and was seen
properly , the eerie sensation of having strayed into a dim , cold ,
empty room , talking rapidly . " I will waste no more than I . I am
not a word in Comrade Gregory ' s where
>>> text9.concordance("bacon")
Building index...
Displaying 2 of 2 matches:
h Republic in Paris , and over their bacon and eggs upon their sunny balcony th
both very thoroughly . The beans and bacon , which these unaccountable people c
>>> text8
<Text: Personals Corpus>
>>> " ".join(text8[20:200])
'in uniform for fun times . 40 yo SINGLE DAD , sincere friendly DTE seeks r / sh
ip with fem age open S / E 44yo tall seeks working single mum or lady below 45 f
ship rship . Nat Open 6 . 2 35 yr old OUTGOING M seeks fem 28 - 35 for o / door
sports - w / e away A professional business male , late 40s , 6 feet tall , slim
build , well groomed , great personality , home owner , interests include the a
rts travel and all things good , Ringwood area , is seeking a genuine female of
similar age or older , in same area or surrounds , for a meaningful long term rs
hip . Looking forward to hearing from you all . ABLE young man seeks , sexy olde
r women . Phone for fun ready to play AFFECTIONATE LADY Sought by generous guy ,
40s , mutual fulfillment ARE YOU ALONE or lost in a r / ship too , with no hope
in sight ? Maybe we could explore'
>>> for i in range(0, 20): text3.generate(5);
...
In the cave of Machpelah
In the sweat of thy
In the selfsame day ,
In the selfsame day ,
In the cave which was
In the day of the
In the cave that is
In the cave that is
In the selfsame day was
In the selfsame day ,
In the beginning God created
In the day , that
In the six hundredth year
In the same became mighty
In the selfsame day entered
In the cave of the
In the mount Gilead .
In the day of the
In the selfsame day was
In the sweat of thy
>>> text = "";
>>> for i in range(0, 500):
... text += text3[random.randint(0, 1000)] + " ";
...
>>> text
'earth firmament firmament of his every . and And the called upon it Let every t
he the heaven was that made face it . above all the over And Let living every an
d earth , he earth that host fourth the firmament over it multiply yielding abun
dantly was of the and also two the made the and . ki that light morning called d
ay called that saw itself was signs his , said every of and morning his fourth A
nd heaven earth yielding make be earth and , there our and ; and the them and th
e the , God his . them , the brought And God Spirit , every every , there the th
e that and the he And called was the and the blessed the tree the he work And ov
er And it said . the it the the blessed , it the , host the fowl the , in moveth
so heavens was and firmament kind and the Be ki it and light heaven of , seed w
hose for fish the the from over it itself , of the thing firmament And so the Go
d And was of the the was them ; in yea Behold for the heaven man were creepeth a
re herb good that And the In living all the have God , earth two day shall , ear
th . our the were forth the . living Thus forth and that which Behold the days t
o , the rule fowl the for them the that rule the multiply the Spirit morning lif
e the the that were so , in because ki waters he his night the fruit them day cr
eature the And And upon in the the , . whose the saw a creeping heaven the , was
for seventh God fish be the firmament God created the Let every moving earth ru
le were yielding which the was and Let And created of to in the and forth all br
ought green day the and thing he , God , Let in all man over and all and and the
the Se of signs have two earth . the the multiply their be in and beginning mak
e the to dominion abundantly said over living upon lights God of ; day ; the Se
firmament all saw the the thing created every which every the whose man face eve
ning , and the night , to said thing the the yielding the work was of he itself
was was have . that made morning was and bring every the fruitful of the and for
th of have ki and fish seed the and called And the and be itself light them And
. rule the firmament Let the so which brought the , . and and the of God on the
, it the be ear which and of . , the the light thing made moveth ; saw it God fr
uit let of upon , and the seed the earth generations And that earth the fifth cr
eature '
>>>List comprehension stuff
a = range(1, 100);
print(a)
print("\r\n.. extracting odd numbers ..\r\n");
w = [x for x in a if x % 2 != 0];
print(w)
w = [x*2 for x in w];
print("\r\n.. multiplied by two ..\r\n");
print(w)
print("\r\n.. the quick brown fox ..\r\n");
s = "the quick brown fox";
print(s);
s = " ".join([c.upper() for x in s.split(" ") for c in x]);
print(s);Regular expressions.
>>> edwords = [w for w in words if re.search('^[aeiouAEIOU]{3}$', w)]
>>> edwords
['iao', 'oii']>>> newwords = [w for w in words if re.search('^[aeiouAEIOU][^aeiouAEIOU][aeiouA
EIOU]$', w)];
>>> newwords
['aba', 'Abe', 'Abo', 'Abu', 'abu', 'ace', 'Ada', 'Ade', 'ade', 'ado', 'aga', 'a
ge', 'ago', 'aha', 'aho', 'ahu', 'Aka', 'aka', 'ake', 'ako', 'aku', 'ala', 'ale'
, 'alo', 'ama', 'ame', 'Ami', 'ami', 'Ana', 'ana', 'ani', 'apa', 'ape', 'ara', '
are', 'Aro', 'aru', 'Asa', 'ase', 'Ata', 'ate', 'Ati', 'ava', 'Ave', 'ave', 'avo
', 'awa', 'awe', 'axe', 'aye', 'ayu', 'azo', 'Edo', 'ego', 'eke', 'Eli', 'eme',
'emu', 'era', 'ere', 'eta', 'Eva', 'Eve', 'eve', 'Ewe', 'ewe', 'eye', 'iba', 'Ib
o', 'ice', 'Ida', 'ide', 'Ido', 'ife', 'ihi', 'Ijo', 'Ike', 'Ila', 'Ima', 'imi',
'imu', 'Ino', 'Ira', 'ire', 'iso', 'Ita', 'Ito', 'iva', 'iwa', 'iyo', 'obe', 'o
bi', 'oda', 'ode', 'Ofo', 'oho', 'oka', 'oki', 'Ole', 'Ona', 'ona', 'one', 'ope'
, 'ora', 'ore', 'ose', 'Oto', 'Ova', 'ova', 'owe', 'ubi', 'Uca', 'Udi', 'udo', '
uji', 'uke', 'ula', 'ule', 'ulu', 'ume', 'umu', 'Una', 'upo', 'ura', 'ure', 'Uri
', 'Uro', 'Uru', 'use', 'Uta', 'uta', 'Ute', 'utu', 'uva']
>>>newwords = [w for w in words if re.search(r'([aeiouAEIOU])[^\1]\1', w)];
>>> newwords = [w for w in words if re.search('^[aeiouAEIOU][^aeiouAEIOU][aeiouA
EIOU]{2}$', w)];
>>> newwords
['Abie', 'Adai', 'Agao', 'Agau', 'agee', 'agio', 'agua', 'ague', 'akee', 'akia',
'Alea', 'alee', 'aloe', 'Amia', 'anoa', 'apii', 'apio', 'aqua', 'aquo', 'area',
'aria', 'arui', 'awee', 'Eboe', 'eboe', 'edea', 'eheu', 'ejoo', 'Ekoi', 'Elia',
'epee', 'eria', 'Erie', 'etua', 'etui', 'Evea', 'evoe', 'idea', 'ilia', 'Inia',
'Itea', 'Ixia', 'oboe', 'ogee', 'ohia', 'Ohio', 'okee', 'okia', 'Okie', 'Olea',
'oleo', 'olio', 'omao', 'oxea', 'Ubii', 'Ulua', 'ulua', 'unau', 'unie', 'Unio',
'unio', 'urao', 'urea', 'Uria', 'usee', 'utai', 'uvea']
>>> newwords = [w for w in words if re.search('^[aeiouAEIOU][^aeiouAEIOU][aeiouA
EIOU]{3}$', w)];
>>> newwords
['adieu', 'Araua', 'Arioi', 'iliau', 'Umaua']
>>>>>> newwords = [w for w in words if re.search('[aeiouAEIOU][^aeiouAEIOU][aeiouAEIOU]{2,}', w) and len(w) == 4]
>>> newwords
['Abie', 'Adai', 'Agao', 'Agau', 'agee', 'agio', 'agua', 'ague', 'akee', 'akia', 'Alea', 'alee', 'aloe', 'Amia', 'anoa', 'apii', 'apio', 'aqua', 'aquo', 'area', 'aria', 'arui', 'awee', 'Eboe', 'eboe', 'edea', 'eheu', 'ejoo', 'Ekoi', 'Elia', 'epee', 'eria', 'Erie', 'etua', 'etui', 'Evea', 'evoe', 'idea', 'ilia', 'Inia', 'Itea', 'Ixia', 'oboe', 'ogee', 'ohia', 'Ohio', 'okee', 'okia', 'Okie', 'Olea', 'oleo', 'olio', 'omao', 'oxea', 'Ubii', 'Ulua', 'ulua', 'unau', 'unie', 'Unio', 'unio', 'urao', 'urea', 'Uria', 'usee', 'utai', 'uvea']
>>> >>> newwords = [w for w in words if re.search('[aeiouAEIOU][^aeiouAEIOU][aeiouAEIOU]{2,}', w) and len(w) == 5]
>>> newwords
['aalii', 'abeam', 'abear', 'Abies', 'aboil', 'aboon', 'about', 'acier', 'acoin', 'Acuan', 'adead', 'adeem', 'adeep', 'Adiel', 'adieu', 'adion', 'Aequi', 'aerie', 'aevia', 'afear', 'afoam', 'afoot', 'afoul', 'again', 'Agiel', 'aguey', 'ahead', 'aheap', 'Ailie', 'Aimee', 'ainoi', 'Alain', 'aleak', 'Aleut', 'alias', 'alien', 'aloed', 'aloid', 'aloin', 'Alois', 'aloof', 'aloud', 'amaas', 'amain', 'ameed', 'ameen', 'amour', 'anear', 'anion', 'anoil', 'Anous', 'Aotea', 'apaid', 'apeak', 'apian', 'apiin', 'Apios', 'Apium', 'apoop', 'apout', 'arain', 'Araua', 'aread', 'areal', 'Arean', 'arear', 'areek', 'areel', 'Arian', 'Ariel', 'ariel', 'Aries', 'Arioi', 'Arion', 'ariot', 'Arius', 'aroar', 'aroid', 'aroon', 'Aruac', 'Asian', 'asoak', 'atour', 'audio', 'aulae', 'auloi', 'aurae', 'avail', 'avian', 'avoid', 'await', 'aweek', 'aweel', 'axial', 'axiom', 'axion', 'axoid', 'ayous', 'azoic', 'babai', 'baboo', 'Babua', 'bacao', 'Bahai', 'bahoe', 'bahoo', 'Bajau', 'bakie', 'balai', 'Balao', 'balao', 'balei', 'baloo', 'baria', 'barie', 'baroi', 'batea', 'bayou', 'bazoo', 'belee', 'belie', 'Beroe', 'besee', 'bevue', 'Bibio', 'Bihai', 'bijou', 'bilio', 'bogie', 'bogue', 'bohea', 'boree', 'bowie', 'burao', 'Butea', 'Buteo', 'cabio', 'cacao', 'cadua', 'cameo', 'canoe', 'caroa', 'carua', 'cavae', 'Cavia', 'cavie', 'Celia', 'cequi', 'ceria', 'cigua', 'cilia', 'citee', 'citua', 'cobia', 'cocoa', 'cogue', 'copei', 'coque', 'Coree', 'coroa', 'curie', 'curio', 'curua', 'cusie', 'cutie', 'Dalea', 'Damia', 'damie', 'Danai', 'danio', 'Darii', 'daroo', 'Delia', 'didie', 'Dixie', 'dixie', 'dogie', 'dolia', 'donee', 'Donia', 'doree', 'doria', 'dowie', 'dulia', 'Durio', 'dusio', 'ecoid', 'eerie', 'elain', 'Elean', 'Eleut', 'Elian', 'Elias', 'Eliot', 'Eloah', 'emeer', 'Eneas', 'eniac', 'enoil', 'equal', 'equid', 'equip', 'Equus', 'Erian', 'exeat', 'facia', 'Fedia', 'Ferae', 'feria', 'ferie', 'Ferio', 'Fidia', 'fifie', 'fikie', 'filao', 'fique', 'fogou', 'folia', 'folie', 'folio', 'fosie', 'fotui', 'fovea', 'fugue', 'fusee', 'gagee', 'galea', 'galee', 'Galei', 'garoo', 'Gavia', 'gazee', 'genie', 'genii', 'Genoa', 'genua', 'Getae', 'Gilia', 'gilia', 'Gobia', 'Gobio', 'golee', 'goloe', 'gonia', 'gulae', 'hagia', 'Hakea', 'Haloa', 'helio', 'heloe', 'Hevea', 'hinau', 'holia', 'Howea', 'hutia', 'Idaic', 'ideal', 'Idean', 'idiom', 'idiot', 'ileac', 'ileon', 'ileum', 'ileus', 'Iliac', 'iliac', 'Iliad', 'ilial', 'Ilian', 'iliau', 'ilium', 'inial', 'inion', 'irian', 'Isaac', 'Iseum', 'Isiac', 'ivied', 'Ixion', 'jabia', 'jagua', 'Jamie', 'janua', 'jiboa', 'jiqui', 'jixie', 'joree', 'josie', 'Julia', 'Julie', 'julio', 'juvia', 'kahau', 'kamao', 'kanae', 'kapai', 'karou', 'Katie', 'kazoo', 'Kenai', 'kibei', 'Kiwai', 'Kogia', 'kohua', 'kokio', 'kolea', 'Konia', 'Koroa', 'kukui', 'kunai', 'labia', 'lakie', 'lamia', 'Lanao', 'Laria', 'Layia', 'legoa', 'legua', 'lehua', 'Lelia', 'levee', 'Lewie', 'lexia', 'linea', 'linie', 'logia', 'logie', 'logoi', 'lokao', 'loxia', 'Lucia', 'Lutao', 'luteo', 'macao', 'Madia', 'mafoo', 'mahoe', 'mahua', 'Makua', 'maleo', 'Mamie', 'manei', 'mania', 'maniu', 'mapau', 'maqui', 'marae', 'Maria', 'maria', 'Marie', 'Mario', 'Masai', 'matai', 'Media', 'media', 'medio', 'melee', 'Melia', 'meloe', 'Mesua', 'mikie', 'mimeo', 'Mitua', 'Mocoa', 'Moqui', 'movie', 'mowie', 'mudee', 'Munia', 'musie', 'Nahua', 'nakoo', 'napoo', 'Nasua', 'nigua', 'nikau', 'nisei', 'nixie', 'Nogai', 'noria', 'norie', 'noyau', 'nubia', 'nuque', 'obeah', 'ocean', 'odeon', 'odeum', 'odium', 'odoom', 'ogeed', 'oleic', 'olein', 'onion', 'onium', 'opium', 'Oraon', 'oread', 'Oreas', 'Orias', 'oriel', 'Orion', 'osier', 'oukia', 'ourie', 'ovoid', 'Ozias', 'Palau', 'palea', 'Papio', 'Paque', 'parao', 'patao', 'patio', 'Pavia', 'payee', 'pedee', 'pekoe', 'pewee', 'Picae', 'Picea', 'pilau', 'Pilea', 'pique', 'pitau', 'pixie', 'polio', 'Poria', 'potoo', 'poyou', 'puree', 'radii', 'radio', 'ramie', 'ratio', 'razee', 'razoo', 'rebia', 'redia', 'redue', 'regia', 'rehoe', 'reree', 'resee', 'resue', 'retia', 'retie', 'revie', 'revue', 'rodeo', 'Rogue', 'rogue', 'rokee', 'roleo', 'Romeo', 'roque', 'Roxie', 'Rubia', 'rupee', 'rupia', 'rupie', 'Sabia', 'Sacae', 'Sadie', 'Sagai', 'sajou', 'Sakai', 'sanai', 'sepia', 'sequa', 'serai', 'serau', 'serio', 'Sesia', 'setae', 'Simia', 'Sinae', 'sitao', 'sitio', 'socii', 'sodio', 'Sofia', 'Solea', 'solea', 'Solio', 'solio', 'soree', 'Sosia', 'sotie', 'sulea', 'Supai', 'Susie', 'taboo', 'tafia', 'tagua', 'tahua', 'Takao', 'talao', 'tania', 'tanoa', 'tapia', 'tapoa', 'taqua', 'Tarai', 'tarau', 'tarea', 'tarie', 'tatie', 'tatou', 'tawie', 'tazia', 'tegua', 'Telei', 'tenai', 'tenio', 'tepee', 'tereu', 'tibia', 'Tigua', 'Tilia', 'tinea', 'Todea', 'togue', 'topee', 'topia', 'toque', 'torii', 'towai', 'tozee', 'tubae', 'tugui', 'tuque', 'turio', 'tutee', 'uloid', 'Umaua', 'umiak', 'Uniat', 'uniat', 'union', 'unoil', 'upeat', 'ureal', 'ureic', 'ureid', 'Uriah', 'urial', 'Urian', 'Uriel', 'usual', 'utees', 'uveal', 'uviol', 'vacoa', 'vague', 'vakia', 'value', 'venie', 'venue', 'Vicia', 'video', 'Vidua', 'vigia', 'vijao', 'vinea', 'vireo', 'visie', 'vogue', 'wahoo', 'Warua', 'Wayao', 'wekau', 'wifie', 'xenia', 'Yagua', 'yagua', 'Yahoo', 'yahoo', 'Yameo', 'Yaqui', 'Yazoo', 'yowie', 'Zamia', 'Zaque', 'zirai', 'Zizia', 'Zoque', 'zowie']
>>> >>> newwords = [w for w in words if re.search('[aefghin]', w) and len(w) <= 7];
>>> newwords
['a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'Aaron', 'Aaronic', 'Aaru', 'aba', 'A
babdeh', 'Ababua', 'abac', 'abaca', 'abacate', 'abacay', 'abacist', 'aback', 'ab
actor', 'abacus', 'Abadite', 'abaff', 'abaft', 'abaiser', 'abalone', 'Abama', 'a
bandon', 'Abanic', 'Abantes', 'Abaris', 'abas', 'abase', 'abased', 'abaser', 'Ab
asgi', 'abash', 'abashed', 'abasia', 'abasic', 'abask', 'Abassin', 'abate', 'aba
ter', 'abatis', 'abaton', 'abator', 'Abatua', 'abature', 'abave', 'abaxial', 'ab
axile', 'abaze', 'abb', 'Abba', 'abbacy', 'abbas', 'abbasi', 'abbassi', 'abbess'
, 'abbey', 'Abbie', 'abbot', 'abbotcy', 'abdal', 'abdat', 'abdest', 'Abdiel', 'a
bdomen', 'abduce', 'abduct', 'Abe', 'abeam', 'abear', 'abed', 'abeigh', 'Abel',
'abele', 'Abelia', 'Abelian', 'Abelite', 'abelite', 'Aberia', 'abet', 'abettal',
'abettor', 'abey', 'abeyant', 'abfarad', 'abhenry', 'abhor', 'abidal', 'abide',
'abider', 'abidi', 'abiding', 'Abie', 'Abies', 'abietic', 'abietin', 'Abiezer',
'Abigail', 'abigail', 'abigeat', 'abigeus', 'abilao', 'ability', 'abilla', 'abi
lo', 'abiosis', 'abiotic', 'Abipon', 'abir', 'abiston', 'Abitibi', 'abiuret', 'a
bject', 'abjoint', 'abjudge', 'abjure', 'abjurer', 'abkar', 'abkari', 'Abkhas',
...
Zoarite', 'zoarium', 'zodiac', 'zoea', 'zoeal', 'zoeform', 'zoetic', 'zogan', 'zog
o', 'Zohak', 'zoic', 'zoid', 'Zoilean', 'Zoilism', 'Zoilist', 'zoisite', 'zoism'
, 'zoist', 'zoistic', 'Zolaism', 'Zolaist', 'Zolaize', 'zolle', 'zombi', 'zombie
', 'zonal', 'zonally', 'zonar', 'Zonaria', 'zonary', 'zonate', 'zonated', 'zone'
, 'zoned', 'zonelet', 'Zongora', 'zonic', 'zoning', 'zonite', 'Zonites', 'zoniti
d', 'zonoid', 'Zonta', 'Zontian', 'zonular', 'zonule', 'zonulet', 'zonure', 'zon
urid', 'Zonurus', 'zoocarp', 'zooecia', 'zoogamy', 'zoogene', 'zoogeny', 'zoogon
y', 'zooid', 'zooidal', 'zoolite', 'zoolith', 'zoology', 'zoon', 'zoonal', 'zoon
ic', 'zoonist', 'zoonite', 'zoonomy', 'zoons', 'zoonule', 'zoopery', 'zoopsia',
'zoosis', 'zootaxy', 'zooter', 'zootic', 'Zootoca', 'zootype', 'Zoque', 'Zoquean
', 'zorgite', 'zoril', 'zorilla', 'zorillo', 'Zosma', 'zoster', 'Zostera', 'Zoua
ve', 'zounds', 'zowie', 'Zoysia', 'zudda', 'zuisin', 'Zuleika', 'Zulinde', 'Zulu
ize', 'zumatic', 'Zuni', 'Zunian', 'zunyite', 'Zutugil', 'zuza', 'zwitter', 'zyg
a', 'Zygaena', 'zygal', 'zygion', 'zygite', 'Zygnema', 'zygoma', 'zygon', 'zygos
e', 'zygosis', 'zygote', 'zygotic', 'zygous', 'zymase', 'zyme', 'zymic', 'zymin'
, 'zymite', 'zymogen', 'zymoid', 'zymome', 'zymomin', 'zymosis', 'zymotic', 'zym
urgy', 'Zyrian', 'Zyryan', 'zythem', 'Zythia', 'zythum']>>> newwords = [w for w in words if re.search('^[d-f][g-i][j-l][m-o]$', w)];
>>> newwords
['dilo', 'film', 'filo']
>>>/((?:north\s*|south\s*)*(?:west|east))|((north|south)(?!\s(north|south)))/then, for abbvs:
/(?:\b(n|s)*(w|e)\b)/then, compact into one:
/((?:north\s*|south\s*)*(?:west|east))|((north|south)(?!\s(north|south)))|(?:\b(n|s)*(w|e)\b)/using some code:
import re;
regex_direction = re.compile(r"((?:north\s*|south\s*)*(?:west|east))|((north|south)(?!\s(north|south)))|(?:\b(n|s)*(w|e)\b)", re.IGNORECASE);
while True:
sentence = raw_input("\r\n[Enter a movement command]\r\n$ ");
if sentence == "exit":
break;
matches = re.finditer(regex_direction, sentence);
directions = [match.group(0) for match in matches];
print(directions);
print("Finished!");