Compare commits

..

No commits in common. "db9fc35715eafa2977a0bc1ac0f78c28b092f30a" and "537cfdee1b4f722a3db7c2c5d20625d23f5f8313" have entirely different histories.

18 changed files with 246 additions and 419 deletions

2
.gitignore vendored
View File

@ -5,5 +5,3 @@
/data
backend/api_logs.txt
*__pycache__*
.env
poetry.lock

View File

@ -1,215 +0,0 @@
def parse_int(args):
try:
return int( args )
except ValueError:
print(f"{args} cannot be cast to int")
raise
def mangrove_by_country_latest():
return """
select * from models_final.final__protected_mangroves_summary_stats_by_country_agg
where year = '2020'
order by cumulative_pixels_diff desc
"""
def bitcoin_business_growth_timeseries(args):
days_ago = parse_int(args["days_ago"])
country_name = args["country_name"]
return f"""
select * from models_final.final__bitcoin_business_growth_by_country
where days_ago <= {days_ago} and country_name = '{country_name}'
order by date
"""
def bitcoin_business_growth_percent_diff_days_ago(args):
days_ago = parse_int(args["days_ago"])
return f"""
with
filtered_data as (
select country_name, date, days_ago, cumulative_value
from models_final.final__bitcoin_business_growth_by_country
where days_ago <= {days_ago}
order by country_name, days_ago desc
),
first_and_last_values as (
select
country_name,
date,
days_ago,
cumulative_value,
first_value(cumulative_value) over (
partition by country_name order by days_ago desc
) as first_value,
first_value(date) over (
partition by country_name order by days_ago desc
) as first_date,
first_value(cumulative_value) over (
partition by country_name order by days_ago
) as last_value,
first_value(date) over (
partition by country_name order by days_ago
) as last_date
from filtered_data
),
diff as (
select
country_name,
date,
first_date,
last_date,
days_ago,
cumulative_value,
first_value,
last_value,
last_value - first_value as difference,
round(
100 * safe_divide((last_value - first_value), first_value), 2
) as percent_difference
from first_and_last_values
)
select *
from diff
where days_ago = 1
order by difference desc
"""
# def bitcoin_business_growth_timeseries(query):
# pipeline = [
# {
# "$match": {
# "days_ago": {"$lte": int(query["days_ago"])},
# "country_name": query["country_name"],
# }
# },
# {
# "$project": {
# "country_name": "$country_name",
# "date": "$date",
# "cumulative_value": "$cumulative_value",
# }
# },
# {"$sort": {"country_name": 1, "days_ago": 1}},
# ]
# return pipeline
# def mangrove_by_country_latest():
# pipeline = [
# {
# "$match": {"year": "2020"},
# },
# ]
# return pipeline
#
#
# def mangrove_by_country_agg(query):
# pipeline = [
# {"$match": {"country_with_parent": query["country_with_parent"]}},
# {
# "$group": {
# "_id": {"country_with_parent": "$country_with_parent", "year": "$year"},
# "total_pixels": {"$sum": "$total_n_pixels"},
# }
# },
# {
# "$project": {
# "_id": 0,
# "country_with_parent": "$_id.country_with_parent",
# "year": "$_id.year",
# "total_pixels": 1,
# }
# },
# {"$sort": {"year": 1}},
# ]
# return pipeline
#
#
# def bitcoin_business_growth_timeseries(query):
# pipeline = [
# {
# "$match": {
# "days_ago": {"$lte": int(query["days_ago"])},
# "country_name": query["country_name"],
# }
# },
# {
# "$project": {
# "country_name": "$country_name",
# "date": "$date",
# "cumulative_value": "$cumulative_value",
# }
# },
# {"$sort": {"country_name": 1, "days_ago": 1}},
# ]
# return pipeline
#
#
# def bitcoin_business_growth_percent_diff_days_ago(query):
pipeline = [
{"$match": {"days_ago": {"$lte": int(query["days_ago"])}}},
{"$sort": {"country_name": 1, "days_ago": 1}},
{
"$group": {
"_id": "$country_name",
"firstvalue": {"$first": "$cumulative_value"},
"lastvalue": {"$last": "$cumulative_value"},
"firstdate": {"$min": "$date"},
"lastdate": {"$max": "$date"},
}
},
{
"$project": {
"country_name": "$_id",
"first_value": "$firstvalue",
"last_value": "$lastvalue",
"difference": {
"$subtract": [
{"$todouble": "$firstvalue"},
{"$todouble": "$lastvalue"},
]
},
"first_date": "$firstdate",
"last_date": "$lastdate",
"percent_difference": {
"$cond": {
"if": {"$eq": [{"$todouble": "$lastvalue"}, 0]},
"then": {
"$cond": {
"if": {"$gt": [{"$todouble": "$firstvalue"}, 0]},
"then": "new",
"else": "none",
}
},
"else": {
"$round": [
{
"$multiply": [
{
"$divide": [
{
"$subtract": [
{"$todouble": "$firstvalue"},
{"$todouble": "$lastvalue"},
]
},
{"$todouble": "$lastvalue"},
]
},
100,
]
}
]
},
}
},
}
},
]
return pipeline
#
#
# def bitcoin_business_growth_latest(query):
# pipeline = [
# {
# "$match": query["filter"],
# },
# {"$sort": {"date": 1}},
# ]
# return pipeline

View File

@ -1,34 +0,0 @@
from psycopg2.extras import RealDictCursor
import psycopg2, os
class PostgresHandler:
def __init__(self):
self.connection = self.connect_to_pg()
self.cur = self.connection.cursor(cursor_factory=RealDictCursor)
def connect_to_pg(self):
try:
connection = psycopg2.connect(
dbname=os.getenv('PGDATABASE'),
host=os.getenv('PGHOST'),
user=os.getenv('PGUSER'),
password=os.getenv('PGPASSWORD'),
port=os.getenv('PGPORT'),
)
except Exception as e:
message=f"Connection to postgres database failed: {e}"
raise Exception(message)
print(f"Successfully connected to DB")
return connection
def execute_query(self, query):
try:
self.cur.execute(query)
results = self.cur.fetchall()
self.connection.commit()
self.connection.close()
return results
except Exception:
print("Error executing query")
raise

View File

@ -1,96 +0,0 @@
from fastapi import APIRouter
from api.postgres_handler import PostgresHandler
import api.pipelines as pipelines
import api.schemas as schemas
from api.schemas import DataSerializer
import json
router = APIRouter()
def parse_args_to_dict(query):
try:
return json.loads(query)
except json.JSONDecodeError as e:
return {"error": f"Invalid JSON: {e}"}
@router.get("/mangrove_by_country_latest")
async def mangrove_by_country_latest():
pipeline = pipelines.mangrove_by_country_latest()
handler = PostgresHandler()
schema = schemas.mangrove_by_country_latest_schema
serializer = DataSerializer(schema)
rawData = handler.execute_query(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
@router.get("/bitcoin_business_growth_timeseries")
async def bitcoin_business_growth_timeseries(query: str):
args = parse_args_to_dict(query)
pipeline = pipelines.bitcoin_business_growth_timeseries(args)
handler = PostgresHandler()
schema = schemas.bitcoin_business_growth_timeseries_schema
serializer = DataSerializer(schema)
rawData = handler.execute_query(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
@router.get("/bitcoin_business_growth_percent_diff")
async def bitcoin_business_growth_percent_diff(query: str):
args = parse_args_to_dict(query)
pipeline = pipelines.bitcoin_business_growth_percent_diff_days_ago(args)
handler = PostgresHandler()
schema = schemas.bitcoin_business_growth_percent_diff_schema
serializer = DataSerializer(schema)
rawData = handler.execute_query(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
# @router.get("/bitcoin_business_growth_percent_diff")
# async def bitcoin_business_growth_percent_diff(query: str):
# query = ast.literal_eval(query)
#
# query = queries.bitcoin_business_growth_percent_diff_days_ago(query)
# handler = PostgresHandler(connection)
#
# schema = schemas.bitcoin_business_growth_percent_diff_schema
# pipeline = pipelines.bitcoin_business_growth_percent_diff_days_ago(query)
# serializer = DataSerializer(schema)
# handler = MongoDBHandler(collection_name)
# rawData = handler.aggregate(pipeline)
# serializedData = serializer.serialize_many(rawData)
# return serializedData
# @router.get("/mangrove_by_country_agg")
# async def mangrove_by_country_agg(query: str):
# query = ast.literal_eval(query)
# db = client.baseddata
# collection_name = db["final__protected_mangroves_summary_stats_by_country_agg"]
# schema = schemas.mangrove_by_country_agg_schema
# pipeline = pipelines.mangrove_by_country_agg(query)
# serializer = DataSerializer(schema)
# handler = MongoDBHandler(collection_name)
# rawData = handler.aggregate(pipeline)
# serializedData = serializer.serialize_many(rawData)
# return serializedData
#
# @router.get("/bitcoin_business_growth_timeseries")
# async def bitcoin_business_growth_timeseries(query: str):
# query = ast.literal_eval(query)
# db = client.baseddata
# collection_name = db["final__bitcoin_business_growth_by_country"]
# schema = schemas.bitcoin_business_growth_timeseries_schema
# pipeline = pipelines.bitcoin_business_growth_timeseries(query)
# serializer = DataSerializer(schema)
# handler = MongoDBHandler(collection_name)
# rawData = handler.aggregate(pipeline)
# serializedData = serializer.serialize_many(rawData)
# return serializedData

View File

@ -0,0 +1,5 @@
from pymongo.mongo_client import MongoClient
client = MongoClient(
host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin"
)

View File

@ -1,6 +1,15 @@
from fastapi import FastAPI
from api.route import router
from pymongo.mongo_client import MongoClient
from routes.route import router
app = FastAPI()
client = MongoClient(
host=["10.0.10.35:27017"], username="admin", password="1234", authSource="admin"
)
try:
client.admin.command("ping")
print("Successfully pinged MongoDB deployment!")
except Exception as e:
print(e)
app.include_router(router)

View File

@ -0,0 +1,12 @@
class MongoDBHandler:
def __init__(self, collection):
self.collection = collection
def find_limit(self, limit: int):
return self.collection.find().limit(limit)
def find_one(self, query):
return self.collection.find_one(query)
def aggregate(self, query):
return self.collection.aggregate(query)

123
backend/models/pipelines.py Normal file
View File

@ -0,0 +1,123 @@
def mangrove_by_country_latest():
pipeline = [
{
"$match": {"year": "2020"},
},
]
return pipeline
def mangrove_by_country_agg(query):
pipeline = [
{"$match": {"country_with_parent": query["country_with_parent"]}},
{
"$group": {
"_id": {"country_with_parent": "$country_with_parent", "year": "$year"},
"total_pixels": {"$sum": "$total_n_pixels"},
}
},
{
"$project": {
"_id": 0,
"country_with_parent": "$_id.country_with_parent",
"year": "$_id.year",
"total_pixels": 1,
}
},
{"$sort": {"year": 1}},
]
return pipeline
def bitcoin_business_growth_timeseries(query):
pipeline = [
{
"$match": {
"days_ago": {"$lte": int(query["days_ago"])},
"country_name": query["country_name"],
}
},
{
"$project": {
"country_name": "$country_name",
"date": "$date",
"cumulative_value": "$cumulative_value",
}
},
{"$sort": {"country_name": 1, "days_ago": 1}},
]
return pipeline
def bitcoin_business_growth_percent_diff_days_ago(query):
pipeline = [
{"$match": {"days_ago": {"$lte": int(query["days_ago"])}}},
{"$sort": {"country_name": 1, "days_ago": 1}},
{
"$group": {
"_id": "$country_name",
"firstValue": {"$first": "$cumulative_value"},
"lastValue": {"$last": "$cumulative_value"},
"firstDate": {"$min": "$date"},
"lastDate": {"$max": "$date"},
}
},
{
"$project": {
"country_name": "$_id",
"first_value": "$firstValue",
"last_value": "$lastValue",
"difference": {
"$subtract": [
{"$toDouble": "$firstValue"},
{"$toDouble": "$lastValue"},
]
},
"first_date": "$firstDate",
"last_date": "$lastDate",
"percent_difference": {
"$cond": {
"if": {"$eq": [{"$toDouble": "$lastValue"}, 0]},
"then": {
"$cond": {
"if": {"$gt": [{"$toDouble": "$firstValue"}, 0]},
"then": "new",
"else": "none",
}
},
"else": {
"$round": [
{
"$multiply": [
{
"$divide": [
{
"$subtract": [
{"$toDouble": "$firstValue"},
{"$toDouble": "$lastValue"},
]
},
{"$toDouble": "$lastValue"},
]
},
100,
]
}
]
},
}
},
}
},
]
return pipeline
def bitcoin_business_growth_latest(query):
pipeline = [
{
"$match": query["filter"],
},
{"$sort": {"date": 1}},
]
return pipeline

61
backend/routes/route.py Normal file
View File

@ -0,0 +1,61 @@
from fastapi import APIRouter
from config.database import client
from models.mongodb_handler import MongoDBHandler
import models.pipelines as pipelines
import schema.schemas as schemas
from schema.schemas import DataSerializer
import ast
router = APIRouter()
@router.get("/mangrove_by_country_latest")
async def mangrove_by_country_latest():
db = client.baseddata
collection_name = db["final__protected_mangroves_summary_stats_by_country_agg"]
schema = schemas.mangrove_by_country_latest_schema
pipeline = pipelines.mangrove_by_country_latest()
serializer = DataSerializer(schema)
handler = MongoDBHandler(collection_name)
rawData = handler.aggregate(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
@router.get("/mangrove_by_country_agg")
async def mangrove_by_country_agg(query: str):
query = ast.literal_eval(query)
db = client.baseddata
collection_name = db["final__protected_mangroves_summary_stats_by_country_agg"]
schema = schemas.mangrove_by_country_agg_schema
pipeline = pipelines.mangrove_by_country_agg(query)
serializer = DataSerializer(schema)
handler = MongoDBHandler(collection_name)
rawData = handler.aggregate(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
@router.get("/bitcoin_business_growth_percent_diff")
async def bitcoin_business_growth_percent_diff(query: str):
query = ast.literal_eval(query)
db = client.baseddata
collection_name = db["final__bitcoin_business_growth_by_country"]
schema = schemas.bitcoin_business_growth_percent_diff_schema
pipeline = pipelines.bitcoin_business_growth_percent_diff_days_ago(query)
serializer = DataSerializer(schema)
handler = MongoDBHandler(collection_name)
rawData = handler.aggregate(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData
@router.get("/bitcoin_business_growth_timeseries")
async def bitcoin_business_growth_timeseries(query: str):
query = ast.literal_eval(query)
db = client.baseddata
collection_name = db["final__bitcoin_business_growth_by_country"]
schema = schemas.bitcoin_business_growth_timeseries_schema
pipeline = pipelines.bitcoin_business_growth_timeseries(query)
serializer = DataSerializer(schema)
handler = MongoDBHandler(collection_name)
rawData = handler.aggregate(pipeline)
serializedData = serializer.serialize_many(rawData)
return serializedData

View File

@ -1,3 +1,6 @@
def dt_to_date(datetime):
return datetime.date()
def mangrove_by_country_latest_schema(data):
return {
"country_with_parent": str(data["country_with_parent"]),
@ -17,8 +20,8 @@ def mangrove_by_country_agg_schema(data):
def bitcoin_business_growth_percent_diff_schema(data):
return {
"country_name": str(data["country_name"]),
"date_range": str(f'{data["first_date"]} to {data["last_date"]}'),
"first_value": int(data["first_value"]),
"date_range": str(f'{dt_to_date(data["first_date"])} to {dt_to_date(data["last_date"])}'),
"last_value": int(data["last_value"]),
"difference": int(data["difference"]),
"percent_difference": str(data["percent_difference"])
@ -27,7 +30,7 @@ def bitcoin_business_growth_percent_diff_schema(data):
def bitcoin_business_growth_timeseries_schema(data):
return {
"country_name": str(data["country_name"]),
"date": data["date"],
"date": dt_to_date(data["date"]),
"cumulative_value": int(data["cumulative_value"])
}
@ -36,7 +39,7 @@ class DataSerializer:
self.schema_func = schema_func
def serialize_one(self, data) -> dict:
return self.schema_func(dict( data ))
return self.schema_func(data)
def serialize_many(self, data_list) -> list:
return [self.serialize_one(data) for data in data_list]

View File

@ -16,7 +16,7 @@ The chart always reflects the countries selected in the table.
<br/>
{{< dropdown_filter id="days_ago_dropdown_filter" id_filter="days_ago" options="1 day:1,7 day:7,28 day:28,1 year:365,5 year:1826,10 year:3652,all time:10000" default_selection="7 day" targets="bitcoin-business-growth-chart bitcoin-business-growth-table" >}}
{{< table id="bitcoin-business-growth-table" endpoint="bitcoin_business_growth_percent_diff" headers="{'country_name': 'Country', 'date_range': 'Date Range', 'first_value': 'Previous #', 'last_value': 'Current #', 'difference': 'Diff', 'percent_difference': '% Diff'}" maxHeight="400px" sortable="true" valueId="country_name" selectableRows="multi" targets="bitcoin-business-growth-chart" defaultFirstSelected="true" >}}
{{< table id="bitcoin-business-growth-table" endpoint="bitcoin_business_growth_percent_diff" headers="{'country_name': 'Country', 'date_range': 'Date Range', 'last_value': 'Previous #', 'first_value': 'Current #', 'difference': 'Diff', 'percent_difference': '% Diff'}" maxHeight="400px" sortable="true" valueId="country_name" selectableRows="multi" targets="bitcoin-business-growth-chart" >}}
{{< chart id="bitcoin-business-growth-chart" endpoint="bitcoin_business_growth_timeseries" chartType="line" xAxisField="date" yAxisField="cumulative_value" scaleChart=true >}}

View File

@ -9,8 +9,6 @@ tags: ["Bitcoin", "Stats"]
script: "/js/mangrove-map.js"
---
{{< table id="mangrove_countries" endpoint="mangrove_by_country_latest" headers="{'country_with_parent': 'Country', 'original_pixels': '1996 Cover', 'total_n_pixels': '2020 Cover', 'cumulative_pixels_diff': 'Diff', 'cumulative_pct_diff': '% Diff'}" maxHeight="400px" sortable="true" valueId="country_with_parent" selectableRows="single" defaultFirstSelected="true" >}}
{{< table id="mangrove_countries" endpoint="mangrove_by_country_latest" headers="{'country_with_parent': 'Country', 'original_pixels': '1996 Cover', 'total_n_pixels': '2020 Cover', 'cumulative_pixels_diff': 'Diff', 'cumulative_pct_diff': '% Diff'}" maxHeight="400px" sortable="true" valueId="country_with_parent" selectableRows="single" >}}
{{< chart id="mangrove_countries" endpoint="mangrove_by_country_agg" chartType="bar" xAxisField="year" yAxisField="total_pixels" scaleChart=true >}}
{{< map id="map" style="https://tiles.semitamaps.com/styles/maptiler-basic/style.json">}}
{{< chart id="mangrove-country-timeseries-chart" endpoint="mangrove_country_timeseries" chartType="line" xAxisField="date" yAxisField="n_pixels" scaleChart=true >}}

View File

@ -7,8 +7,7 @@
sortable,
valueId,
selectableRows,
filterTargets,
defaultFirstSelected,
filterTargets
) {
async function fetchDataForTable(query) {
try {
@ -26,7 +25,9 @@
}
function generateTable(data) {
const jsonTableContainer = document.getElementById(`${id}--container`);
const jsonTableContainer = document.getElementById(
`${id}--container`,
);
jsonTableContainer.className = "jsonTableContainer";
jsonTableContainer.innerHTML = "";
jsonTableContainer.style.maxHeight = maxHeight;
@ -62,13 +63,6 @@
}
table.appendChild(thead);
table.appendChild(tbody);
jsonTableContainer.appendChild(table);
// sortable
if (sortable == "true") {
table.className = "sortable";
sorttable.makeSortable(document.getElementById(`${id}`));
}
if (selectableRows === "multi" || selectableRows === "single") {
const rows = table.getElementsByTagName("tr");
@ -82,7 +76,7 @@
filterId: valueId,
filterValue: this.value,
filterActions: ["selected"],
filterTargets: filterTargets,
filterTargets: filterTargets
},
});
document.dispatchEvent(event);
@ -92,7 +86,7 @@
filterId: valueId,
filterValue: this.value,
filterActions: ["deselected"],
filterTargets: filterTargets,
filterTargets: filterTargets
},
});
document.dispatchEvent(event);
@ -108,28 +102,20 @@
}
}
});
if (defaultFirstSelected == true) {
if (i == 1) {
rows[i].classList.add("selected");
const event = new CustomEvent("filterChange", {
detail: {
filterId: valueId,
filterValue: rows[i].value,
filterActions: ["selected"],
filterTargets: filterTargets,
},
});
document.dispatchEvent(event);
}
}
}
}
jsonTableContainer.appendChild(table);
// sortable
if (sortable == "true") {
table.className = "sortable";
sorttable.makeSortable(document.getElementById(`${id}`));
}
}
// listen for filter events for this target
document.addEventListener("filterChange", function (event) {
tableId = document.getElementById(id).id;
tableId = document.getElementById(id).id
if (event.detail.filterTargets.includes(tableId)) {
query = queryConstructor();
fetchDataForTable(query);

View File

@ -2,7 +2,7 @@
<div id = '{{ .Get "id" }}--container'>
<script>
document.addEventListener("DOMContentLoaded", function () {
createTable({{ .Get "endpoint" }}, {{ .Get "id" }}, {{ .Get "headers" | safeJS }}, {{ .Get "maxHeight" }}, {{ .Get "sortable" }}, {{ .Get "valueId" }}, {{ .Get "selectableRows" }}, '{{ .Get "targets" }}'.split(" "), {{ .Get "defaultFirstSelected" | safeJS }})
createTable({{ .Get "endpoint" }}, {{ .Get "id" }}, {{ .Get "headers" | safeJS }}, {{ .Get "maxHeight" }}, {{ .Get "sortable" }}, {{ .Get "valueId" }}, {{ .Get "selectableRows" }}, '{{ .Get "targets" }}'.split(" "))
});
</script>
</div>

View File

@ -1,22 +0,0 @@
[tool.poetry]
name = "baseddata-io"
version = "0.1.0"
description = ""
authors = ["Sam <samual.shop@proton.me>"]
readme = "README.md"
packages = [{include = "baseddata"}]
package-mode = false
[virtualenvs]
in-project = true
[tool.poetry.dependencies]
python = "^3.11"
fastapi = "^0.115.4"
uvicorn = "^0.32.0"
psycopg2 = "^2.9.10"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@ -2,18 +2,22 @@
pkgs.mkShell
{
nativeBuildInputs = with pkgs; [
python312Packages.flask
python312Packages.flask-cors
python312Packages.requests
python312Packages.pandas
python312Packages.orjson
hugo
python311
poetry
tmux
mongodb-tools
python312Packages.uvicorn
python312Packages.fastapi
python312Packages.pymongo
];
NIX_LD_LIBRARY_PATH=/run/current-system/sw/share/nix-ld/lib;
NIX_LD=/run/current-system/sw/share/nix-ld/lib/ld.so;
shellHook = ''
${pkgs.cowsay}/bin/cowsay "Welcome to the baseddata.io development environment!" | ${pkgs.lolcat}/bin/lolcat
export LD_LIBRARY_PATH=$NIX_LD_LIBRARY_PATH
source .env
source .venv/bin/activate
get_session=$(tmux list-session | grep "baseddata")
if [ -z "$get_session" ];

View File

@ -1,8 +1,7 @@
/* Charts */
.chart-container {
display: flex;
/* height: 600px; */
aspect-ratio: 1 / 1;
height: 300px;
}
.chart {

View File

@ -4,10 +4,6 @@
overflow-y: auto;
}
table.sortable th:not(.sorttable_sorted):not(.sorttable_sorted_reverse):not(.sorttable_nosort):after {
content: " \25B4\25BE"
}
table {
width: 100%;
border-collapse: collapse;