In: Computer Science
Using Python
The logic is built to examine the process incoming data for specific items of information. This may need to be done in specific order with multiple processing steps.
You are to run your script of this test data file. Screen shot your interactions with the user for your submission document. Then place your analysis file, your python code file, and your submission document into a single zip file.
Some advice would be since you have the test data file, you can do these calculations by hand and check them against your analysis file to see if your program is working correctly.
ANSWER :--
GIVEN THAT :--
SCREEN SHOTS :--
CODE :--
import re
from collections import Counter
def read_in_file(fname):
data = []
try:
f = open(fname,'r') #opening the file
except:
return -1
line = f.readline() #reading one line and storing it
while(line!=''):
data.append(line) #adding the line to the list
line = f.readline()
f.close() #file closed
return data
def main():
while(True):
fname = input("Enter the name of the file ==> ")
data = read_in_file(fname)
if(data==-1):
print("Could not find the file specified. "+fname+" not
found")
else:
break
line_no = len(data) #no. of lines = length of list
words_no = 0
char_no = 0
data_nopunc = []
word_pairs = []
f = open('Analysis-'+fname,"w+") #opening file for writing
for line in data:
temp = line.strip() #removing trailing whitespaces
char_no+=len(temp) #total no. of characters on line = length of
string "line"
temp= temp.split() #splitting the line into words
for i in range(len(temp)-1):
if(temp[i].isalpha() and temp[i+1].isalpha()): #if two consecutive
words are alphabets, and not punctuation, they should be a word
pair
word_pairs.append(temp[i]+","+temp[i+1])
temp = ' '.join(temp) #join the list elements into a string
temp = re.sub('[\W_]+', ' ', temp).split() #replace everything thst
isn't a word with a space, and then form a list with the remaining
words
for i in temp:
data_nopunc.append(i) #data with no punctuations
words_no+=len(temp) #no. of words
count = Counter(data_nopunc) #A Counter forms a dictionary, where
the key is a list element and the value is the number of times the
key exists in the list.
f.write("No.of words : "+str(words_no))
f.write("\n")
f.write("No.of chars : "+str(char_no))
f.write("\n")
f.write("No. of lines : "+str(line_no))
f.write("\n\n")
f.write("Unique words and their frequencies:-\n")
print("No.of words :",words_no)
print("No.of chars :",char_no)
print("No. of lines :",line_no)
print()
unique_no = len(count.keys())#No. of unique words will be the no.
of keys in the dictionary
unique_letter_no = 0
for i in sorted(count.keys()):
f.write(i+" ("+str(count[i])+")")
f.write("\n")
unique_letter_no+=len(i)#adding no. of unique letters
together
count = Counter(word_pairs)#counting no. of word pairs
f.write("\nRepeated two word pairs and their
frequencies:-\n")
print("Repeated two word pairs and their frequencies:-")
letter_no = 0
for i in data_nopunc:
letter_no+= len(i)#The total no. of letters
cnt = 0
for i in sorted(count.keys()):
if(count[i]>1):#if a word pair appears more than once
print(i+" ("+str(count[i])+")")
f.write(i+" ("+str(count[i])+")")
cnt+=1
f.write("\n")
f.write("\nWord statistics:-\n")
f.write("Total no. of words : "+str(words_no))
f.write("\n")
f.write("Average length of a word :
"+str(letter_no/words_no))
f.write("\n")
f.write("Total no. of unique words : "+str(unique_no))
f.write("\n")
f.write("Average length of unique words :
"+str(unique_letter_no/unique_no))
f.write("\n")
f.write("No. of repeated two word pairs : "+str(cnt))
print("\nWord statistics:-")
print("Total no. of words : "+str(words_no))
print("Average length of a word : "+str(letter_no/words_no))
print("Total no. of unique words : "+str(unique_no))
print("Average length of unique words :
"+str(unique_letter_no/unique_no))
print("No. of repeated two word pairs : "+str(cnt))
if __name__ == "__main__":
main()
PLEASE GIVE LIKE
*************THANKYOU****************