From 2d56c61856203dfb25feb3e3bdbc199c86559ee7 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 20 Sep 2024 17:49:31 +0100 Subject: [PATCH] New mongodb backend --- backend/app.py | 210 ------------------------ backend/config/database.py | 5 + backend/main.py | 15 ++ backend/models/mongodb_handler.py | 12 ++ backend/models/pipelines.py | 5 + backend/routes/route.py | 21 +++ backend/schema/schemas.py | 19 +++ content/data-lab/protected-mangroves.md | 2 +- hugo.toml | 2 +- layouts/shortcodes/table.html | 6 +- shell.nix | 14 +- 11 files changed, 88 insertions(+), 223 deletions(-) delete mode 100644 backend/app.py create mode 100644 backend/config/database.py create mode 100644 backend/main.py create mode 100644 backend/models/mongodb_handler.py create mode 100644 backend/models/pipelines.py create mode 100644 backend/routes/route.py create mode 100644 backend/schema/schemas.py diff --git a/backend/app.py b/backend/app.py deleted file mode 100644 index 4faf5d1..0000000 --- a/backend/app.py +++ /dev/null @@ -1,210 +0,0 @@ -from flask import Flask, g, jsonify, request, json, Response, send_from_directory, abort -from flask_cors import CORS -import orjson, os -import pandas as pd - -import datetime -import time - -app = Flask(__name__) -CORS(app) - -FILES_DIRECTORY = "../data/" - - -@app.before_request -def start_timer(): - g.start = time.time() - - -@app.after_request -def log(response): - now = time.time() - duration = round(now - g.start, 4) - dt = datetime.datetime.fromtimestamp(now).strftime("%Y-%m-%d %H:%M:%S") - - log_entry = { - "timestamp": dt, - "duration": duration, - "method": request.method, - "url": request.url, - "status": response.status_code, - "remote_addr": request.access_route[-1], - "user_agent": request.user_agent.string, - } - - log_line = ",".join(f"{key}={value}" for key, value in log_entry.items()) - - with open("api_logs.txt", "a") as f: - f.write(log_line + "\n") - - return response - - -@app.route("/bitcoin_business_growth_by_country", methods=["GET"]) -def business_growth(): - - today = datetime.datetime.today() - - # Parse args from request - latest_date = request.args.get("latest_date") - country_names = request.args.get("countries") - cumulative_period_type = request.args.get("cumulative_period_type") - - # Open json locally - with open("../data/final__bitcoin_business_growth_by_country.json", "rb") as f: - data = orjson.loads(f.read()) - - # Filter based on args - if latest_date: - latest_date_bool = latest_date == "true" - filtered_data = [ - item for item in data if item["latest_date"] == latest_date_bool - ] - else: - filtered_data = data - - if country_names: - countries = [name.strip() for name in country_names.split(",")] - filtered_data = [ - item for item in filtered_data if item["country_name"] in countries - ] - - if cumulative_period_type == "1 day": - delta = today - datetime.timedelta(days=2) - filtered_data = [ - item - for item in filtered_data - if item["cumulative_period_type"] == cumulative_period_type - and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") - ] - elif cumulative_period_type == "7 day": - delta = today - datetime.timedelta(days=8) - filtered_data = [ - item - for item in filtered_data - if item["cumulative_period_type"] == cumulative_period_type - and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") - ] - elif cumulative_period_type == "28 day": - delta = today - datetime.timedelta(days=29) - filtered_data = [ - item - for item in filtered_data - if item["cumulative_period_type"] == cumulative_period_type - and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") - ] - elif cumulative_period_type == "365 day": - delta = today - datetime.timedelta(days=366) - filtered_data = [ - item - for item in filtered_data - if item["cumulative_period_type"] == cumulative_period_type - and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") - ] - - # Sort by date - sorted_data = sorted(filtered_data, key=lambda x: x["date"], reverse=False) - - # Return json - return Response(json.dumps(sorted_data), mimetype="application/json") - - -@app.route("/get_json/", methods=["GET"]) -def get_json(filename): - - period = request.args.get("period") - today = datetime.datetime.today() - - file_path = os.path.join(FILES_DIRECTORY, filename) - if not os.path.isfile(file_path): - abort(404) - - with open(file_path, "r") as f: - data = orjson.loads(f.read()) - - if period == "last 7 days": - delta = today - datetime.timedelta(days=7) - filtered_data = [ - item - for item in data - if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today - ] - sorted_data = sorted(filtered_data, key=lambda x: x["date"]) - elif period == "last 28 days": - delta = today - datetime.timedelta(days=28) - filtered_data = [ - item - for item in data - if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today - ] - sorted_data = sorted(filtered_data, key=lambda x: x["date"]) - elif period == "last 365 days": - delta = today - datetime.timedelta(days=365) - filtered_data = [ - item - for item in data - if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today - ] - sorted_data = sorted(filtered_data, key=lambda x: x["date"]) - elif period == "last 2 years": - delta = today - datetime.timedelta(days=730) - filtered_data = [ - item - for item in data - if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today - ] - sorted_data = sorted(filtered_data, key=lambda x: x["date"]) - else: - sorted_data = sorted(data, key=lambda x: x["date"]) - - return jsonify(sorted_data) - - -@app.route("/mangrove_data/", methods=["GET"]) -def mangrove_data(method): - - with open("../data/dev/final__wdpa_pid_mangrove_diff_stats.json", "rb") as f: - data = orjson.loads(f.read()) - - if method == "countries": - df = pd.read_json(json.dumps(data)) - countries = df[["year", "country", "n_pixels", "diff", "cumulative_diff"]] - countriesAgg = countries.groupby(["year", "country"]).agg( - {"n_pixels": "sum", "diff": "sum", "cumulative_diff": "sum"} - ) - countriesAgg["year0_pixels"] = ( - countriesAgg["n_pixels"] - countriesAgg["cumulative_diff"] - ) - countriesAgg["pct_diff"] = ( - 100 - * (countriesAgg["n_pixels"] - countriesAgg["year0_pixels"]) - / countriesAgg["year0_pixels"] - ).round(2) - countriesLatest = countriesAgg.loc[[2020]].reset_index().set_index("country") - return Response( - countriesLatest.to_json(orient="index"), mimetype="application/json" - ) - - -@app.route("/download/", methods=["GET"]) -def download_file(filename): - try: - return send_from_directory(FILES_DIRECTORY, filename, as_attachment=True) - except FileNotFoundError: - abort(404) - - -@app.route("/cog", methods=["GET"]) -def serve_cog(): - year = request.args.get("year") - pid = request.args.get("pid") # change this line - dir = f"{FILES_DIRECTORY}/cog/{year}/" - try: - return send_from_directory(dir, f"{pid}.tif", as_attachment=True) - except FileNotFoundError: - abort(404) - - -if __name__ == "__main__": - app.run() diff --git a/backend/config/database.py b/backend/config/database.py new file mode 100644 index 0000000..a7bbee3 --- /dev/null +++ b/backend/config/database.py @@ -0,0 +1,5 @@ +from pymongo.mongo_client import MongoClient + +client = MongoClient( + host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin" +) diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..23a4e59 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,15 @@ +from fastapi import FastAPI +from pymongo.mongo_client import MongoClient +from routes.route import router + +app = FastAPI() +client = MongoClient( + host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin" +) +try: + client.admin.command("ping") + print("Successfully pinged MongoDB deployment!") +except Exception as e: + print(e) + +app.include_router(router) diff --git a/backend/models/mongodb_handler.py b/backend/models/mongodb_handler.py new file mode 100644 index 0000000..7b22a8a --- /dev/null +++ b/backend/models/mongodb_handler.py @@ -0,0 +1,12 @@ +class MongoDBHandler: + def __init__(self, collection): + self.collection = collection + + def find_limit(self, limit: int): + return self.collection.find().limit(limit) + + def find_one(self, query): + return self.collection.find_one(query) + + def aggregate(self, query): + return self.collection.aggregate(query) diff --git a/backend/models/pipelines.py b/backend/models/pipelines.py new file mode 100644 index 0000000..d42fd78 --- /dev/null +++ b/backend/models/pipelines.py @@ -0,0 +1,5 @@ +aggregate_mangrove_by_country = [ + { + "$match": {"year": "2020"}, + }, +] diff --git a/backend/routes/route.py b/backend/routes/route.py new file mode 100644 index 0000000..309dcbc --- /dev/null +++ b/backend/routes/route.py @@ -0,0 +1,21 @@ +from fastapi import APIRouter +from config.database import client +from models.mongodb_handler import MongoDBHandler +import models.pipelines as pipelines +import schema.schemas as schemas +from schema.schemas import DataSerializer + +router = APIRouter() + + +@router.get("/mangrove_country_data") +async def mangrove_country_data(): + db = client.baseddata + collection_name = db["final__protected_mangroves_summary_stats_by_country_agg"] + schema = schemas.mangrove_country_schema + query = pipelines.aggregate_mangrove_by_country + serializer = DataSerializer(schema) + handler = MongoDBHandler(collection_name) + rawData = handler.aggregate(query) + serializedData = serializer.serialize_many(rawData) + return serializedData diff --git a/backend/schema/schemas.py b/backend/schema/schemas.py new file mode 100644 index 0000000..04b9b2e --- /dev/null +++ b/backend/schema/schemas.py @@ -0,0 +1,19 @@ +def mangrove_country_schema(data): + return { + "country_with_parent": str(data["country_with_parent"]), + "original_pixels": int(data["original_pixels"]), + "total_n_pixels": int(data["total_n_pixels"]), + "cumulative_pixels_diff": int(data["cumulative_pixels_diff"]), + "cumulative_pct_diff": float(data["cumulative_pct_diff"]), + } + + +class DataSerializer: + def __init__(self, schema_func): + self.schema_func = schema_func + + def serialize_one(self, data) -> dict: + return self.schema_func(data) + + def serialize_many(self, data_list) -> list: + return [self.serialize_one(data) for data in data_list] diff --git a/content/data-lab/protected-mangroves.md b/content/data-lab/protected-mangroves.md index 5bd36b8..bb11027 100644 --- a/content/data-lab/protected-mangroves.md +++ b/content/data-lab/protected-mangroves.md @@ -9,5 +9,5 @@ tags: ["Bitcoin", "Stats"] script: "/js/mangrove-map.js" --- -{{< table id="mangrove_countries" endpoint="mangrove_data/countries" tableKey="country" headers="{'country': 'Country', 'cumulative_diff': 'Diff', 'pct_diff': '% Diff'}" maxHeight="400px" sortable="true">}} +{{< table id="mangrove_countries" endpoint="mangrove_country_data" headers="{'country_with_parent': 'Country', 'original_pixels': '1996 Cover', 'total_n_pixels': '2020 Cover', 'cumulative_pixels_diff': 'Diff', 'cumulative_pct_diff': '% Diff'}" maxHeight="400px" sortable="true">}} {{< map id="map" style="https://tiles.semitamaps.com/styles/maptiler-basic/style.json">}} diff --git a/hugo.toml b/hugo.toml index cc8d204..fe6de01 100644 --- a/hugo.toml +++ b/hugo.toml @@ -3,7 +3,7 @@ languageCode = 'en-gb' title = 'Based Data' [params] - apiURL = 'http://localhost:5000' + apiURL = 'http://localhost:8000' [markup.highlight] pygmentsUseClasses = false diff --git a/layouts/shortcodes/table.html b/layouts/shortcodes/table.html index f1b401a..9cddf0f 100644 --- a/layouts/shortcodes/table.html +++ b/layouts/shortcodes/table.html @@ -42,11 +42,7 @@ const td = document.createElement("td"); const div = document.createElement("div"); div.id = "scrollable"; - if (columnName == "{{ .Get `tableKey` }}") { - div.textContent = key; - } else { - div.textContent = data[key][columnName]; - }; + div.textContent = data[key][columnName]; td.appendChild(div); row.appendChild(td); tbody.appendChild(row); diff --git a/shell.nix b/shell.nix index b061898..9252e91 100644 --- a/shell.nix +++ b/shell.nix @@ -1,5 +1,4 @@ -{ pkgs ? import { } }: - +{pkgs ? import {}}: pkgs.mkShell { nativeBuildInputs = with pkgs; [ @@ -10,9 +9,13 @@ pkgs.mkShell python312Packages.orjson hugo tmux + mongodb-tools + python312Packages.uvicorn + python312Packages.fastapi + python312Packages.pymongo ]; - shellHook = '' + shellHook = '' ${pkgs.cowsay}/bin/cowsay "Welcome to the baseddata.io development environment!" | ${pkgs.lolcat}/bin/lolcat get_session=$(tmux list-session | grep "baseddata") @@ -22,9 +25,8 @@ pkgs.mkShell tmux new-session -d -s baseddata tmux split-window -h tmux send-keys -t 0 "hugo server" C-m - tmux send-keys -t 1 "cd backend && python app.py" C-m + tmux send-keys -t 1 "python backend/main.py" C-m echo "Baseddata running in dev tmux shell" fi - ''; + ''; } -