New mongodb backend

This commit is contained in:
Sam 2024-09-20 17:49:31 +01:00
parent 8e7d963e26
commit 2d56c61856
11 changed files with 88 additions and 223 deletions

View File

@ -1,210 +0,0 @@
from flask import Flask, g, jsonify, request, json, Response, send_from_directory, abort
from flask_cors import CORS
import orjson, os
import pandas as pd
import datetime
import time
app = Flask(__name__)
CORS(app)
FILES_DIRECTORY = "../data/"
@app.before_request
def start_timer():
g.start = time.time()
@app.after_request
def log(response):
now = time.time()
duration = round(now - g.start, 4)
dt = datetime.datetime.fromtimestamp(now).strftime("%Y-%m-%d %H:%M:%S")
log_entry = {
"timestamp": dt,
"duration": duration,
"method": request.method,
"url": request.url,
"status": response.status_code,
"remote_addr": request.access_route[-1],
"user_agent": request.user_agent.string,
}
log_line = ",".join(f"{key}={value}" for key, value in log_entry.items())
with open("api_logs.txt", "a") as f:
f.write(log_line + "\n")
return response
@app.route("/bitcoin_business_growth_by_country", methods=["GET"])
def business_growth():
today = datetime.datetime.today()
# Parse args from request
latest_date = request.args.get("latest_date")
country_names = request.args.get("countries")
cumulative_period_type = request.args.get("cumulative_period_type")
# Open json locally
with open("../data/final__bitcoin_business_growth_by_country.json", "rb") as f:
data = orjson.loads(f.read())
# Filter based on args
if latest_date:
latest_date_bool = latest_date == "true"
filtered_data = [
item for item in data if item["latest_date"] == latest_date_bool
]
else:
filtered_data = data
if country_names:
countries = [name.strip() for name in country_names.split(",")]
filtered_data = [
item for item in filtered_data if item["country_name"] in countries
]
if cumulative_period_type == "1 day":
delta = today - datetime.timedelta(days=2)
filtered_data = [
item
for item in filtered_data
if item["cumulative_period_type"] == cumulative_period_type
and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d")
]
elif cumulative_period_type == "7 day":
delta = today - datetime.timedelta(days=8)
filtered_data = [
item
for item in filtered_data
if item["cumulative_period_type"] == cumulative_period_type
and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d")
]
elif cumulative_period_type == "28 day":
delta = today - datetime.timedelta(days=29)
filtered_data = [
item
for item in filtered_data
if item["cumulative_period_type"] == cumulative_period_type
and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d")
]
elif cumulative_period_type == "365 day":
delta = today - datetime.timedelta(days=366)
filtered_data = [
item
for item in filtered_data
if item["cumulative_period_type"] == cumulative_period_type
and delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d")
]
# Sort by date
sorted_data = sorted(filtered_data, key=lambda x: x["date"], reverse=False)
# Return json
return Response(json.dumps(sorted_data), mimetype="application/json")
@app.route("/get_json/<filename>", methods=["GET"])
def get_json(filename):
period = request.args.get("period")
today = datetime.datetime.today()
file_path = os.path.join(FILES_DIRECTORY, filename)
if not os.path.isfile(file_path):
abort(404)
with open(file_path, "r") as f:
data = orjson.loads(f.read())
if period == "last 7 days":
delta = today - datetime.timedelta(days=7)
filtered_data = [
item
for item in data
if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today
]
sorted_data = sorted(filtered_data, key=lambda x: x["date"])
elif period == "last 28 days":
delta = today - datetime.timedelta(days=28)
filtered_data = [
item
for item in data
if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today
]
sorted_data = sorted(filtered_data, key=lambda x: x["date"])
elif period == "last 365 days":
delta = today - datetime.timedelta(days=365)
filtered_data = [
item
for item in data
if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today
]
sorted_data = sorted(filtered_data, key=lambda x: x["date"])
elif period == "last 2 years":
delta = today - datetime.timedelta(days=730)
filtered_data = [
item
for item in data
if delta <= datetime.datetime.strptime(item["date"], "%Y-%m-%d") <= today
]
sorted_data = sorted(filtered_data, key=lambda x: x["date"])
else:
sorted_data = sorted(data, key=lambda x: x["date"])
return jsonify(sorted_data)
@app.route("/mangrove_data/<method>", methods=["GET"])
def mangrove_data(method):
with open("../data/dev/final__wdpa_pid_mangrove_diff_stats.json", "rb") as f:
data = orjson.loads(f.read())
if method == "countries":
df = pd.read_json(json.dumps(data))
countries = df[["year", "country", "n_pixels", "diff", "cumulative_diff"]]
countriesAgg = countries.groupby(["year", "country"]).agg(
{"n_pixels": "sum", "diff": "sum", "cumulative_diff": "sum"}
)
countriesAgg["year0_pixels"] = (
countriesAgg["n_pixels"] - countriesAgg["cumulative_diff"]
)
countriesAgg["pct_diff"] = (
100
* (countriesAgg["n_pixels"] - countriesAgg["year0_pixels"])
/ countriesAgg["year0_pixels"]
).round(2)
countriesLatest = countriesAgg.loc[[2020]].reset_index().set_index("country")
return Response(
countriesLatest.to_json(orient="index"), mimetype="application/json"
)
@app.route("/download/<filename>", methods=["GET"])
def download_file(filename):
try:
return send_from_directory(FILES_DIRECTORY, filename, as_attachment=True)
except FileNotFoundError:
abort(404)
@app.route("/cog", methods=["GET"])
def serve_cog():
year = request.args.get("year")
pid = request.args.get("pid") # change this line
dir = f"{FILES_DIRECTORY}/cog/{year}/"
try:
return send_from_directory(dir, f"{pid}.tif", as_attachment=True)
except FileNotFoundError:
abort(404)
if __name__ == "__main__":
app.run()

View File

@ -0,0 +1,5 @@
from pymongo.mongo_client import MongoClient
client = MongoClient(
host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin"
)

15
backend/main.py Normal file
View File

@ -0,0 +1,15 @@
from fastapi import FastAPI
from pymongo.mongo_client import MongoClient
from routes.route import router
app = FastAPI()
client = MongoClient(
host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin"
)
try:
client.admin.command("ping")
print("Successfully pinged MongoDB deployment!")
except Exception as e:
print(e)
app.include_router(router)

View File

@ -0,0 +1,12 @@
class MongoDBHandler:
def __init__(self, collection):
self.collection = collection
def find_limit(self, limit: int):
return self.collection.find().limit(limit)
def find_one(self, query):
return self.collection.find_one(query)
def aggregate(self, query):
return self.collection.aggregate(query)

View File

@ -0,0 +1,5 @@
aggregate_mangrove_by_country = [
{
"$match": {"year": "2020"},
},
]

21
backend/routes/route.py Normal file
View File

@ -0,0 +1,21 @@
from fastapi import APIRouter
from config.database import client
from models.mongodb_handler import MongoDBHandler
import models.pipelines as pipelines
import schema.schemas as schemas
from schema.schemas import DataSerializer
router = APIRouter()
@router.get("/mangrove_country_data")
async def mangrove_country_data():
db = client.baseddata
collection_name = db["final__protected_mangroves_summary_stats_by_country_agg"]
schema = schemas.mangrove_country_schema
query = pipelines.aggregate_mangrove_by_country
serializer = DataSerializer(schema)
handler = MongoDBHandler(collection_name)
rawData = handler.aggregate(query)
serializedData = serializer.serialize_many(rawData)
return serializedData

19
backend/schema/schemas.py Normal file
View File

@ -0,0 +1,19 @@
def mangrove_country_schema(data):
return {
"country_with_parent": str(data["country_with_parent"]),
"original_pixels": int(data["original_pixels"]),
"total_n_pixels": int(data["total_n_pixels"]),
"cumulative_pixels_diff": int(data["cumulative_pixels_diff"]),
"cumulative_pct_diff": float(data["cumulative_pct_diff"]),
}
class DataSerializer:
def __init__(self, schema_func):
self.schema_func = schema_func
def serialize_one(self, data) -> dict:
return self.schema_func(data)
def serialize_many(self, data_list) -> list:
return [self.serialize_one(data) for data in data_list]

View File

@ -9,5 +9,5 @@ tags: ["Bitcoin", "Stats"]
script: "/js/mangrove-map.js"
---
{{< table id="mangrove_countries" endpoint="mangrove_data/countries" tableKey="country" headers="{'country': 'Country', 'cumulative_diff': 'Diff', 'pct_diff': '% Diff'}" maxHeight="400px" sortable="true">}}
{{< table id="mangrove_countries" endpoint="mangrove_country_data" headers="{'country_with_parent': 'Country', 'original_pixels': '1996 Cover', 'total_n_pixels': '2020 Cover', 'cumulative_pixels_diff': 'Diff', 'cumulative_pct_diff': '% Diff'}" maxHeight="400px" sortable="true">}}
{{< map id="map" style="https://tiles.semitamaps.com/styles/maptiler-basic/style.json">}}

View File

@ -3,7 +3,7 @@ languageCode = 'en-gb'
title = 'Based Data'
[params]
apiURL = 'http://localhost:5000'
apiURL = 'http://localhost:8000'
[markup.highlight]
pygmentsUseClasses = false

View File

@ -42,11 +42,7 @@
const td = document.createElement("td");
const div = document.createElement("div");
div.id = "scrollable";
if (columnName == "{{ .Get `tableKey` }}") {
div.textContent = key;
} else {
div.textContent = data[key][columnName];
};
div.textContent = data[key][columnName];
td.appendChild(div);
row.appendChild(td);
tbody.appendChild(row);

View File

@ -1,5 +1,4 @@
{ pkgs ? import <nixpkgs> { } }:
{pkgs ? import <nixpkgs> {}}:
pkgs.mkShell
{
nativeBuildInputs = with pkgs; [
@ -10,9 +9,13 @@ pkgs.mkShell
python312Packages.orjson
hugo
tmux
mongodb-tools
python312Packages.uvicorn
python312Packages.fastapi
python312Packages.pymongo
];
shellHook = ''
shellHook = ''
${pkgs.cowsay}/bin/cowsay "Welcome to the baseddata.io development environment!" | ${pkgs.lolcat}/bin/lolcat
get_session=$(tmux list-session | grep "baseddata")
@ -22,9 +25,8 @@ pkgs.mkShell
tmux new-session -d -s baseddata
tmux split-window -h
tmux send-keys -t 0 "hugo server" C-m
tmux send-keys -t 1 "cd backend && python app.py" C-m
tmux send-keys -t 1 "python backend/main.py" C-m
echo "Baseddata running in dev tmux shell"
fi
'';
'';
}