Skip to content
Snippets Groups Projects
Commit af82949a authored by Peter J. Keleher's avatar Peter J. Keleher
Browse files

auto

parent 63406640
No related branches found
No related tags found
No related merge requests found
import psycopg2
import os
import sys
import datetime
from collections import Counter
from types import *
import argparse
from queries import *
from answers import *
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', help="Print out the query results and more details", required=False, action="store_true")
parser.add_argument('-i', '--interactive', help="Run queries one at a time, and wait for user to proceed", required=False, action="store_true")
parser.add_argument('-q', '--query', type = int, help="Only run and check the given query number", required=False)
args = parser.parse_args()
verbose = args.verbose
interactive = args.interactive
# Check if x and y are almost near match
def match(x, y):
if type(x) != type(y):
return False
if type(x) is int or type(x) is bool:
return x == y
if type(x) is float:
return (abs(x - y) < 0.01)
# Conver to string and compare
# print "Found type: {}".format(type(x))
return str(x).strip() == str(y).strip()
def compareAnswers(ans, correct):
# Special case empty answer
if len(ans) == 0:
if len(correct) == 0:
return ("Score = 4: Both answers empty", 4)
else:
return ("Score = 0: Empty answer", 0)
if len(correct) == 0:
return ("Score = 0: The answer should have been empty", 0)
# If the number of columns is not correct, no score
if len(ans[0]) != len(correct[0]):
return ("Score = 0: Incorrect Number of Columns", 0)
# If the number of rows in the answer is the same, check for near-exact match
if len(ans) == len(correct):
c = Counter()
for (t1, t2) in zip(ans, correct):
for (t1x, t2x) in zip(t1, t2):
c[match(t1x, t2x)] += 1
if c[False] == 0:
return ("Score = 4: Exact or Near-exact Match", 4)
# Let's try to do an approximate match
flattened_ans = Counter([str(x).strip() for y in ans for x in y])
flattened_correct = Counter([str(x).strip() for y in correct for x in y])
jaccard = sum((flattened_correct & flattened_ans).values()) * 1.0/sum((flattened_correct | flattened_ans).values())
if verbose:
print("------ Creating word counts and comparing answers ---------")
print(flattened_correct )
print(flattened_ans)
print("Jaccard Coefficient: {}".format(jaccard) )
if jaccard > 0.9:
if len(ans) == len(correct):
return ("Score = 3: Very similar, but not an exact match (possibly wrong sort order)", 3)
else:
return ("Score = 2: Very similar, but incorrect number of rows", 2)
if jaccard > 0.5:
return ("Score = 1: Somewhat similar answers", 1)
return ("Score = 0: Answers too different", 0)
conn = psycopg2.connect("dbname=flights user=ubuntu")
cur = conn.cursor()
totalscore = 0
for i in range(1, 11):
# If a query is specified by -q option, only do that one
if args.query is None or args.query == i:
try:
print("========== Executing Query {}".format(i))
print(queries[i])
cur.execute(queries[i])
ans = cur.fetchall()
if verbose:
print("--------- Your Query Answer ---------")
for t in ans:
print(t)
print("--------- Correct Answer ---------")
for t in correctanswers[i]:
print(t)
# Compare with correctanswers[i]
cmp_res = compareAnswers(ans, correctanswers[i])
print("-----> " + cmp_res[0])
totalscore += cmp_res[1]
if interactive:
input('Press enter to proceed')
os.system('clear')
except:
print(sys.exc_info())
raise
print("-----------------> Total Score = {}".format(totalscore))
queries = ["" for i in range(0, 11)]
### 0. List all airport codes and their cities. Order by the city name in the increasing order.
### Output column order: airportid, city
queries[0] = """
select airportid, city
from airports
order by city;
"""
### 1. Write a query to find the names of the customers whose names are at least 15 characters long, and the second letter in the name is "l".
### Order by name.
queries[1] = """
select 0;
"""
### 2. Write a query to find any customers who flew on their birthday. Hint: Use "extract" function that operates on the dates.
### Order output by Customer Name.
### Output columns: all columns from customers
queries[2] = """
select 0;
"""
### 3. Write a query to generate a list: (source_city, source_airport_code, dest_city, dest_airport_code, number_of_flights) for all source-dest pairs with at least 3 flights.
### Order first by number_of_flights in decreasing order, then source_city in the increasing order, and then dest_city in the increasing order.
### Note: You must generate the source and destination cities along with the airport codes.
queries[3] = """
select 0;
"""
### 4. Find the name of the airline with the maximum number of customers registered as frequent fliers.
### Output only the name of the airline. If multiple answers, order by name.
queries[4] = """
select 0;
"""
### 5. For all flights from OAK to IAD, list the flight id, airline name, and the
### duration in hours and minutes. So the output will have 4 fields: flightid, airline name,
### hours, minutes. Order by flightid.
queries[5] = """
select 0;
"""
### 6. Write a query to find empty flights (flight, flight date) on any date
### which someone flew. Assume that if anyone flew on a given date, all
### flights took off as scheduled, with or without passengers. Order by flight
### id in increasing order, and then by date in increasing order.
queries[6] = """
select 0;
"""
### 7. Write a query to generate a list of customers who don't list Southwest as their frequent flier airline, but
### actually flew the most (by number of flights) on that airline.
### Output columns: customerid, customer_name
### Order by: customerid
queries[7] = """
select 0;
"""
# fall17
### 8. Write a query to generate a list of customers where the interval between first and last flight is 5 days.
### Order by the customer name.
queries[8] = """
select 0;
"""
# fall17
### 9. Name of customer whose max interval between any two consecutive flights is 4 days.
### The output should be simply a list of names
### Order by the customer name.
queries[9] = """
select 0;
"""
### 10. Write a query that outputs a list: (AirportID, Airport-rank), where we rank the airports
### by the total number of flights that depart that airport. So the airport with the maximum number
### of flights departing gets rank 1, and so on. If two airports tie, then they should
### both get the same rank, and the next rank should be skipped.
### Order the output in increasing order by rank, and then airport ID.
queries[10] = """
select 0;
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment