Question

In: Computer Science

Here is the script for finding the start dates of employees #!/usr/bin/env python3 import csv import...

Here is the script for finding the start dates of employees

#!/usr/bin/env python3


import csv
import datetime
import requests


FILE_URL = "https://storage.googleapis.com/gwg-hol-assets/gic215/employees-with$

def get_start_date():
"""Interactively get the start date to query for."""

print()
print('Getting the first start date to query for.')
print()
print('The date must be greater than Jan 1st, 2018')
year = int(input('Enter a value for the year: '))
month = int(input('Enter a value for the month: '))
day = int(input('Enter a value for the day: '))
print()

return datetime.datetime(year, month, day)

def get_file_lines(url):
"""Returns the lines contained in the file at the given URL"""

# Download the file over the internet
response = requests.get(url, stream=True)
lines = []

for line in response.iter_lines():
lines.append(line.decode("UTF-8"))
return lines

def get_same_or_newer(start_date):
"""Returns the employees that started on the given date, or the closest one$
data = get_file_lines(FILE_URL)
reader = csv.reader(data[1:])

# We want all employees that started at the same date or the closest newer
# date. To calculate that, we go through all the data and find the
# employees that started on the smallest date that's equal or bigger than
# the given start date.
min_date = datetime.datetime.today()
min_date_employees = []
for row in reader:
row_date = datetime.datetime.strptime(row[3], '%Y-%m-%d')

   # If this date is smaller than the one we're looking for,
# we skip this row
if row_date < start_date:
continue

# If this date is smaller than the current minimum,
# we pick it as the new minimum, resetting the list of
# employees at the minimal date.
if row_date < min_date:
min_date = row_date
min_date_employees = []

# If this date is the same as the current minimum,
# we add the employee in this row to the list of
# employees at the minimal date.
if row_date == min_date:
min_date_employees.append("{} {}".format(row[0], row[1]))

return min_date, min_date_employees

def list_newer(start_date):
while start_date < datetime.datetime.today():
start_date, employees = get_same_or_newer(start_date)
print("Started on {}: {}".format(start_date.strftime("%b %d, %Y"), empl$

# Now move the date to the next one
start_date = start_date + datetime.timedelta(days=1)

def main():
start_date = get_start_date()
list_newer(start_date)

if __name__ == "__main__":
main()

After running the current code, it takes about 2 minutes to output the start dates. The function 'get_same_or_newer()' must be modified to improve performance. First you must download the file only once from the URL, then pre-process the calculation by creating a dictionary with start_dates OR sorting the data by start_dates then go date by date

Solutions

Expert Solution

Working code implemented in Python and appropriate comments provided for better understanding.

Source Code:

#!/usr/bin/env python3


import csv
import datetime
import requests


FILE_URL = "https://storage.googleapis.com/gwg-hol-assets/gic215/employees-with-date.csv"

def get_start_date():
"""Interactively get the start date to query for."""

print()
print('Getting the first start date to query for.')
print()
print('The date must be greater than Jan 1st, 2018')
year = int(input('Enter a value for the year: '))
month = int(input('Enter a value for the month: '))
day = int(input('Enter a value for the day: '))
print()

return datetime.datetime(year, month, day)

def get_file_lines(url):
"""Returns the lines contained in the file at the given URL"""

# Download the file over the internet
response = requests.get(url, stream=True)
lines = []

for line in response.iter_lines():
lines.append(line.decode("UTF-8"))
return lines

def get_date_employees_dictionary(url):
data = get_file_lines(url)
reader = csv.reader(data[1:])
date_employees_dic = {}
for row in reader:
row_date = row[3]
if row_date in date_employees_dic.keys():
date_employees_dic[row_date].append("{} {}".format(row[0], row[1]))
else:
date_employees_dic[row_date] = ["{} {}".format(row[0],row[1])]
return date_employees_dic


def get_same_or_newer(start_date, url):
"""Returns the employees that started on the given date, or the closest one."""


# We want all employees that started at the same date or the closest newer
# date. To calculate that, we go through all the data and find the
# employees that started on the smallest date that's equal or bigger than
# the given start date.
min_date = datetime.datetime.today()
min_date_employees = []
date_employees_dic = get_date_employees_dictionary(url)

# date_employees_dic[row_date] = row[4]
# If this date is smaller than the one we're looking for,
# we skip this row
for row_date_str, employee in date_employees_dic.items():
row_date = datetime.datetime.strptime(row_date_str, "%Y-%m-%d")
if row_date < start_date:
continue

# If this date is smaller than the current minimum,
# we pick it as the new minimum, resetting the list of
# employees at the minimal date.
if row_date < min_date:
min_date = row_date
min_date_employees = []

# If this date is the same as the current minimum,
# we add the employee in this row to the list of
# employees at the minimal date.
if row_date == min_date:
min_date_employees.append("{}".format(date_employees_dic[row_date_str]))

return min_date, min_date_employees

def list_newer(start_date):
start_date, employees = get_same_or_newer(start_date,FILE_URL)
date_employees_dic = get_date_employees_dictionary(FILE_URL)
if start_date < datetime.datetime.today():
for date_str, employee in date_employees_dic.items():
date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
if date > start_date:
print("Started on {}: {}".format(date.strftime("%b %d, %Y"), employee))

# Now move the date to the next one
# start_date = start_date + datetime.timedelta(days=1)

def main():
start_date = get_start_date()
list_newer(start_date)


# def main():
# print(get_date_employees_dictionary(FILE_URL))

# def main():
# start_date = get_start_date()
# min_date, min_date_employees = get_same_or_newer(start_date)
# print("Minimum date is: {}, the min_date_employees are:{}".format(min_date,min_date_employees))
if __name__ == "__main__":
main()

Code Screenshots:


Related Solutions

Python I am creating a class in python. Here is my code below: import csv import...
Python I am creating a class in python. Here is my code below: import csv import json population = list() with open('PopChange.csv', 'r') as p: reader = csv.reader(p) next(reader) for line in reader: population.append(obj.POP(line)) population.append(obj.POP(line)) class POP: """ Extract the data """ def __init__(self, line): self.data = line # get elements self.id = self.data[0].strip() self.geography = self.data[1].strip() self.targetGeoId = self.data[2].strip() self.targetGeoId2 = self.data[3].strip() self.popApr1 = self.data[4].strip() self.popJul1 = self.data[5].strip() self.changePop = self.data[6].strip() The problem is, I get an error saying:  ...
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT