diff --git a/.vagrant/machines/default/virtualbox/action_provision b/.vagrant/machines/default/virtualbox/action_provision deleted file mode 100644 index 8b5a88337253b1871da113f3cd9cf45ef0af3b99..0000000000000000000000000000000000000000 --- a/.vagrant/machines/default/virtualbox/action_provision +++ /dev/null @@ -1 +0,0 @@ -1.5:6998c206-acab-4184-9a51-16d605335ecc \ No newline at end of file diff --git a/.vagrant/machines/default/virtualbox/action_set_name b/.vagrant/machines/default/virtualbox/action_set_name index 033ba6c3c40b3c3ea45837c8c2c9c85c60c301a6..432cfee26f25e67acb5e17a3b52bb207543ade46 100644 --- a/.vagrant/machines/default/virtualbox/action_set_name +++ b/.vagrant/machines/default/virtualbox/action_set_name @@ -1 +1 @@ -1511961833 \ No newline at end of file +1512588608 \ No newline at end of file diff --git a/.vagrant/machines/default/virtualbox/id b/.vagrant/machines/default/virtualbox/id index 1f54117b0bf98bf5d07d64e34f46524edacff6ba..b8c81456951c13b380eb1d945480cf1349bfa4c3 100644 --- a/.vagrant/machines/default/virtualbox/id +++ b/.vagrant/machines/default/virtualbox/id @@ -1 +1 @@ -6998c206-acab-4184-9a51-16d605335ecc \ No newline at end of file +6f1ff933-df72-426f-8a59-d93bb147f859 \ No newline at end of file diff --git a/.vagrant/machines/default/virtualbox/index_uuid b/.vagrant/machines/default/virtualbox/index_uuid index d44e6a3159b9b4841a344cc0f607bd596126ad7e..06bc28c154f86d2b68c6eb8a58558031475519ff 100644 --- a/.vagrant/machines/default/virtualbox/index_uuid +++ b/.vagrant/machines/default/virtualbox/index_uuid @@ -1 +1 @@ -3662673237874f709a578940f16436e0 \ No newline at end of file +a3425623643a45f4ada456985e924f92 \ No newline at end of file diff --git a/.vagrant/machines/default/virtualbox/private_key b/.vagrant/machines/default/virtualbox/private_key deleted file mode 100644 index 9ea0b6041504748adce04fecb8a2c52284c632c1..0000000000000000000000000000000000000000 --- a/.vagrant/machines/default/virtualbox/private_key +++ /dev/null @@ -1,27 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIEowIBAAKCAQEAwaG91+7c8ms7hNPsgQC3u3tCLO3UkEVKxSlHYl379j1EBoH8 -Pqnppd2sA1vZjIsfZplPT6akR1W+UGgs2ov61gvJocOKk8R/nB0ygb+lxi5DP0Ae -Abs2ivhkRydB8lX9begebfsOkRpPutffkBdmQgSv112XJ1in5sYCzFPdTzYGQVK4 -+XiQ97IJOZSK2YTz1qpShMCwYjscZElN7OwbozUGX6ShdqcAn+AkVnvOs4K5876M -9QZ0i4Vm/hEow/EjlA2y1BznQfqRDIyFKPCkonczZu4lDpJwvYrhzFHt4CoswJUl -JWMnG4xO7IWOLy7usp2VYzLu0lV795Cn8UMrcwIDAQABAoIBABHUphnDWj9Mlp+F -sGq0uTafhBmFESTz7I25XmR1iSVEwQXkZGkU1no6roNkquThTtPw3DfWfZxADhVz -cmXxq/iX/0pRKmqvFp02pxcBoe9dBT5G1jyMk9o6Q2m1+MGrH3pGfL7d9ppwFNV8 -2Y54SSddLSAk8y1hYZzDsTBFrMSFaO+U24oRTEb6M1u36yu2xCfPywiPTo5arlgl -1kocTuut32CyAf6oMnpOmJHkmMgENNyoHK9msALbb7NjWNIN7BJkKIyCwzTrOAwV -DM1zLFCaOt1XbmTr2h0mjCpFSakyd2qAv1FCRx5CQihgEB+Q85AiGsu1S0JsAbAy -8tQ0WbkCgYEA8Hzqewo1BIsEla6xkQm4r0X0W/ARquosHoFhNc8en7W9wbxIgeJK -fT08JS7OQeXjtO3NOI1OKqRi+vLVC+SWoad4z5WlbRpEE1vuBLuomw8ienoehU3o -Afhh6AfTe+bFNIDjtnhI2NcCTLGV1UBSv/PQ9uJIzKu0LbIn07EHTU0CgYEAzh8b -tmiMHPpArJmNSy/3CC2fJfK2cJeGCGnge4yeZb4tXxVopnHIFSimgTELplBF3A1T -IklKG8UjPyv6/YzREFrG5wiJ5isA5U6rRlVxXRvFPaFrZgwpWXnAx9kMTEkNY/PX -MWwtFNoNBiX40kXmsnQSGtednEmzEyFIobke+78CgYEAsmOW8+ogZdxRbE8HvJS/ -gJSdrM0p7eSDEaXbXDTlfIct2VY3WtFne/s+cD6GFW/YN0/m2TpIb209VvVmf8ny -yrsV7LYO3h2LveiFaNFjQpI5eyy3Ex81KLD8v0d+4vvN8SJix+n175OeSxG4GVX7 -jBPqQBie8+OyZesvi+JXjkkCgYAO3FVOKGl5qV1CVGvIcPPZhhiETuRyZ6pFLwT9 -Q10kLrrssQlzfp6TT/P5OoUBf6YZwGgu/6O1LthktJIK970W9cSDYa03iAAviCV3 -E6/xq4/j6qBoohXVGp8YiYCGe3nYq/rm2ErDxxjLSa+P0XRZh17KyqLVYsbjaTHo -XwVp1wKBgG+wP1EUtM6jKjsASI04HLiMI1xr+JmU0pIE6uT6zo+CZ48fTEK/Oetz -QiouIfdI76eGC5Pc+miiiH8Na/M8qZmMrjmoNdFrgQUKUKzggkCpemyxJDm1AqBp -3AIPGf89I8tl2pPlcfVF3ZS/ZWYX3Ol4YxzBYQb1bIMSg5hgmalR ------END RSA PRIVATE KEY----- diff --git a/.vagrant/machines/default/virtualbox/synced_folders b/.vagrant/machines/default/virtualbox/synced_folders deleted file mode 100644 index 9f1b58f434e38b2adfb523d939a1b64efbd0250b..0000000000000000000000000000000000000000 --- a/.vagrant/machines/default/virtualbox/synced_folders +++ /dev/null @@ -1 +0,0 @@ -{"virtualbox":{"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/keleher/dfs/p6","disabled":false,"__vagrantfile":true}}} \ No newline at end of file diff --git a/grading.py b/grading.py index 449db60fae412afd0139f293c6d6b8e806b9ed29..bf5aec378fd283372c60e69fa9d558a58a9a7ef5 100644 --- a/grading.py +++ b/grading.py @@ -9,7 +9,7 @@ setDefaultAnswer(sc.parallelize([0])) taskno = int(sys.argv[1]) if taskno == 1 or taskno == -1: - playRDD = sc.textFile("bigdatafiles/play.txt") + playRDD = sc.textFile("datafiles/play.txt") ### Task 1 print "=========================== Task 1" task1_result = task1(playRDD) @@ -19,7 +19,7 @@ if taskno == 1 or taskno == -1: if taskno == 2 or taskno == -1: ### Task 2 print "=========================== Task 2" - nobelRDD = sc.textFile("bigdatafiles/prize.json") + nobelRDD = sc.textFile("datafiles/prize.json") task2_result = nobelRDD.map(json.loads).flatMap(task2_flatmap).distinct() for x in task2_result.takeOrdered(100): print x.encode('utf-8') @@ -27,14 +27,14 @@ if taskno == 2 or taskno == -1: if taskno == 3 or taskno == -1: #### Task 3 print "=========================== Task 3" - nobelRDD = sc.textFile("bigdatafiles/prize.json") + nobelRDD = sc.textFile("datafiles/prize.json") task3_result = task3(nobelRDD) for x in task3_result.takeOrdered(100): print x if taskno == 4 or taskno == -1: #### Task 4 - logsRDD = sc.textFile("bigdatafiles/NASA_logs_sample.txt") + logsRDD = sc.textFile("datafiles/NASA_logs_sample.txt") print "=========================== Task 4" task4_result = task4(logsRDD, ['02/Jul/1995', '03/Jul/1995', '04/Jul/1995', '05/Jul/1995', '06/Jul/1995']) for x in task4_result.takeOrdered(100): @@ -43,7 +43,7 @@ if taskno == 4 or taskno == -1: if taskno == 5 or taskno == -1: #### Task 5 print "=========================== Task 5" - amazonInputRDD = sc.textFile("bigdatafiles/amazon-ratings.txt") + amazonInputRDD = sc.textFile("datafiles/amazon-ratings.txt") amazonBipartiteRDD = amazonInputRDD.map(lambda x: x.split(" ")).map(lambda x: (x[0], x[1])).distinct() task5_result = task5(amazonBipartiteRDD) for x in task5_result.takeOrdered(100): @@ -52,7 +52,7 @@ if taskno == 5 or taskno == -1: if taskno == 6 or taskno == -1: #### Task 6 print "=========================== Task 6" - logsRDD = sc.textFile("bigdatafiles/NASA_logs_sample.txt") + logsRDD = sc.textFile("datafiles/NASA_logs_sample.txt") task6_result = task6(logsRDD, '03/Jul/1995', '05/Jul/1995') for x in task6_result.takeOrdered(100): print x @@ -60,14 +60,14 @@ if taskno == 6 or taskno == -1: if taskno == 7 or taskno == -1: #### Task 7 print "=========================== Task 7" - nobelRDD = sc.textFile("bigdatafiles/prize.json") + nobelRDD = sc.textFile("datafiles/prize.json") task7_result = task7(nobelRDD) for x in task7_result.takeOrdered(100): print x if taskno == 8 or taskno == -1: #### Task 8 -- we will start with a non-empty currentMatching and do a few iterations - amazonInputRDD = sc.textFile("bigdatafiles/amazon-ratings.txt") + amazonInputRDD = sc.textFile("datafiles/amazon-ratings.txt") amazonBipartiteRDD = amazonInputRDD.map(lambda x: x.split(" ")).map(lambda x: (x[0], x[1])).distinct() print "=========================== Task 8" currentMatching = sc.parallelize([('user1', 'product8')])