In: Computer Science
Using python
what is needed to be added or changed
Problem 1:
The program has three functions in it.
I want you to complete the count_how_many_words() function, and then call it (multiple times) inside main() to find out how many times Poe used the word “Raven” (or “raven”) and how many times he used “Nevermore” (or “nevermore”) inside the poem “The Raven.” You may not use list.count().
Don’t add any global variables or constants (besides the one I’ve declared, which could be moved into main() but would be even uglier there).
Example output (with incorrect numbers):
The word "Raven" (or "raven") appears 42 times in Edgar Allen Poe's "The Raven."
The word "Nevermore" (or "nevermore") appears 48 times in Edgar Allen Poe's "The Raven."
# this is what quick-and-dirty data cleaning looks like, friends
def break_into_list_of_words(string):
"""takes a long string and returns a list of all of the words in the string"""
# vvv YOU DO NOT HAVE TO CHANGE ANYTHING IN HERE vvv
list_of_words = []
# break by newlines to get a list of lines
list_of_lines = string.split('\n')
# remove the empty lines
while '' in list_of_lines:
list_of_lines.remove('')
# split the line up
for line in list_of_lines:
# we have a few words run together with dashes
# this breaks the line up by dashes (non-ideal, but eh)
maybe_broken_line = line.split('—')
# now we will take the line that might be split, and we'll split again
# but this time on spaces
for a_line in maybe_broken_line:
list_of_words = list_of_words + a_line.split(' ')
# if blank spaces crept in (they did), let's get rid of them
while ' ' in list_of_words:
list_of_words.remove(' ')
while '' in list_of_words:
list_of_words.remove('')
# removing a lot of unnecessary punctuation; gives you more options
# for how to solve this problem
# (you'll get a cleaner way to do this, later in the semester, too)
for index in range(0, len(list_of_words)):
list_of_words[index] = list_of_words[index].strip(";")
list_of_words[index] = list_of_words[index].strip("?")
list_of_words[index] = list_of_words[index].strip(".")
list_of_words[index] = list_of_words[index].strip(",")
list_of_words[index] = list_of_words[index].strip("!")
# smart quotes will ruin your LIFE
list_of_words[index] = list_of_words[index].strip("“")
list_of_words[index] = list_of_words[index].strip("”")
list_of_words[index] = list_of_words[index].strip("’")
list_of_words[index] = list_of_words[index].strip("‘")
# all we have now is a list with words without punctuation
# (secretly, some words still have apostrophes and dashes in 'em)
# (but we don't care)
return list_of_words
# ^^^ YOU DO NOT HAVE TO CHANGE ANYTHING IN HERE ^^^
# this is the function you'll add a lot of logic to
def count_how_many_words(word_list, counting_string):
"""takes in a string and a list and returns the number of times that string occurs in the list"""
return None # this is just here so the program still compiles
def main():
count = 0
words = break_into_list_of_words(THE_RAVEN)
# a reasonable first step, to see what you've got:
# for word in words:
# print(word, end = " ")
if __name__ == "__main__":
main()
Here is the Python code. I read in the raven poem from a file called raven.txt, change that if you need to. Needed to do this because otherwise THE_RAVEN shows up undefined.
# this is what quick-and-dirty data cleaning looks like, friends
def break_into_list_of_words(string):
"""takes a long string and returns a list of all of the words in the string"""
# vvv YOU DO NOT HAVE TO CHANGE ANYTHING IN HERE vvv
list_of_words = []
# break by newlines to get a list of lines
list_of_lines = string.split('\n')
# remove the empty lines
while '' in list_of_lines:
list_of_lines.remove('')
# split the line up
for line in list_of_lines:
# we have a few words run together with dashes
# this breaks the line up by dashes (non-ideal, but eh)
maybe_broken_line = line.split('—')
# now we will take the line that might be split, and we'll split again
# but this time on spaces
for a_line in maybe_broken_line:
list_of_words = list_of_words + a_line.split(' ')
# if blank spaces crept in (they did), let's get rid of them
while ' ' in list_of_words:
list_of_words.remove(' ')
while '' in list_of_words:
list_of_words.remove('')
# removing a lot of unnecessary punctuation; gives you more options
# for how to solve this problem
# (you'll get a cleaner way to do this, later in the semester, too)
for index in range(0, len(list_of_words)):
list_of_words[index] = list_of_words[index].strip(";")
list_of_words[index] = list_of_words[index].strip("?")
list_of_words[index] = list_of_words[index].strip(".")
list_of_words[index] = list_of_words[index].strip(",")
list_of_words[index] = list_of_words[index].strip("!")
# smart quotes will ruin your LIFE
list_of_words[index] = list_of_words[index].strip("“")
list_of_words[index] = list_of_words[index].strip("”")
list_of_words[index] = list_of_words[index].strip("’")
list_of_words[index] = list_of_words[index].strip("‘")
# all we have now is a list with words without punctuation
# (secretly, some words still have apostrophes and dashes in 'em)
# (but we don't care)
return list_of_words
# ^^^ YOU DO NOT HAVE TO CHANGE ANYTHING IN HERE ^^^
# this is the function you'll add a lot of logic to
def count_how_many_words(word_list, counting_string):
"""takes in a string and a list and returns the number of times that string occurs in the list"""
counting_string = counting_string.lower ()
counter = 0
for word in word_list:
if word.strip(".',’ -").lower () == counting_string:
counter+= 1
return counter
def main():
f = open ("raven.txt")
THE_RAVEN = f.read ()
f.close ()
count = 0
words = break_into_list_of_words(THE_RAVEN)
s = "Raven"
s2 = s.lower ()
c =count_how_many_words (words, s)
print ("The word ""{0}"" (or ""{1}"") appears {2} times in Edgar Allen Poe's ""The Raven.""".format(s, s2, c))
s = 'Nevermore'
s2 = s.lower ()
c =count_how_many_words (words, s)
print ("The word ""{0}"" (or ""{1}"") appears {2} times in Edgar Allen Poe's ""The Raven.""".format(s, s2, c))
if __name__ == "__main__":
main()