diff --git a/README.md b/README.md index a940a855e88602b90a5a04d3b7e7387034f9a7b2..30b508365e40a954f1ab2cf51f259e585f6d35e9 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ The data in mongodb has the following fields: * ```year``` - Year of the data * ```min_temps``` - Minimum daily temperature as an array. One element is one day * ```max_temps``` - Maximum daily temperature as an array. One element is one day +* ```normal``` - Indicates whether the data is a 30 year normal or not Go to ```/docs``` to view the Swagger generated API docs #### API Endpoints diff --git a/to_mongo.py b/to_mongo.py index 9ad11e5077b1c8c8f23f998ba85a2721aeff695e..3cf477aaa38b7f404539701cd34019eb525690f3 100644 --- a/to_mongo.py +++ b/to_mongo.py @@ -4,6 +4,18 @@ import tqdm import datetime from pymongo import MongoClient +def is_leap_year(year): + if (year % 4) == 0: + if (year % 100) == 0: + if (year % 400) == 0: + return True + else: + return False + else: + return True + else: + return False + client = MongoClient("mongodb+srv://gdd-server:u8i3icLAJXjZEhTs@cluster0.wdxf4.mongodb.net") db = client["gdd_database"] @@ -80,10 +92,89 @@ for year in years: t["min_temps"] = list([float(a) for a in tmin_]) t["max_temps"] = list([float(a) for a in tmax_]) t["_id"] = _id + t["normal"] = False locs.append(t) count += 1 +### 30 YEAR NORMALS ### +### Covers from 1981-2010 ### + +single_year_min = np.zeros((365, 621, 1405)) +single_year_max = np.zeros((365, 621, 1405)) + +single_year_min[:] = np.nan +single_year_max[:] = np.nan + +for year in range(1981, 2010+1): + print (year) + data = xr.open_dataset("data/temps_%s.nc" % year) + + tmins = data.tmin.data + tmaxs = data.tmax.data + + if is_leap_year(year): # extra day in leap year screws everything up + + tmin_1 = tmins[:59] + tmin_2 = tmins[60:] + + tmax_1 = tmaxs[:59] + tmin_2 = tmaxs[60:] + + tmins = np.concatenate([tmin_1, tmin_2], axis=0) + tmaxs = np.concatenate([tmin_1, tmin_2], axis=0) + + single_year_max += tmaxs/30 + single_year_min += tmins/30 + + +x = np.where(~np.isnan(np.nanmean(single_year_max, axis=0))) +x = [(a, b) for a, b in zip(x[0], x[1])] + +lat = lat[::-1] + +tmins = single_year_min +tmaxs = single_year_max + +locs = [] + +count = 0 +for i in tqdm.tqdm(x): + if len(locs) % 100 == 0 and len(locs) != 0: + new_result = gdd.insert_many(locs) + locs = [] + + tmin_ = tmins[:, i[0], i[1]] + tmax_ = tmaxs[:, i[0], i[1]] + + lat_ = lat[i[0]] + lon_ = lon[i[1]] + + a = i + + t = {} + + _id = str(year) + "_" + + _id += str(a[0]) + "_" + str(a[1]) + "_" + "normal" + + t["location"] = {"type": "Point", "coordinates": [float(lon_), float(lat_)]} + t["prism_lat"] = int(a[0]) + t["prism_lon"] = int(a[1]) + + t["last_date"] = 0#datetime.datetime.strptime(str(soy + np.timedelta64(len(tmin_) - 1, "D")) , "%Y-%m-%d") + t["year"] = 0 + t["min_temps"] = list([float(a) for a in tmin_]) + t["max_temps"] = list([float(a) for a in tmax_]) + t["_id"] = _id + + t["normal"] = True + + + locs.append(t) + + count += 1 + if len(locs) != 0: - new_result = gdd.insert_many(locs) \ No newline at end of file + new_result = gdd.insert_many(locs)