baseddata.io/backend/api/pipelines.py

224 lines
7.6 KiB
Python

def parse_int(args):
try:
return int( args )
except ValueError:
print(f"{args} cannot be cast to int")
raise
def mangrove_by_country_latest():
return """
select * from models_final.final__protected_mangroves_summary_stats_by_country_agg
where year = '2020'
order by cumulative_pixels_diff desc
"""
def mangrove_country_timeseries(args):
country_name = args["country_with_parent"]
return f"""
select year, total_n_pixels from models_final.final__protected_mangroves_summary_stats_by_country_agg
where country_with_parent = '{country_name}'
order by year
"""
def bitcoin_business_growth_timeseries(args):
days_ago = parse_int(args["days_ago"])
country_name = args["country_name"]
return f"""
select * from models_final.final__bitcoin_business_growth_by_country
where days_ago <= {days_ago} and country_name = '{country_name}'
order by date
"""
def bitcoin_business_growth_percent_diff_days_ago(args):
days_ago = parse_int(args["days_ago"])
return f"""
with
filtered_data as (
select country_name, date, days_ago, cumulative_value
from models_final.final__bitcoin_business_growth_by_country
where days_ago <= {days_ago}
order by country_name, days_ago desc
),
first_and_last_values as (
select
country_name,
date,
days_ago,
cumulative_value,
first_value(cumulative_value) over (
partition by country_name order by days_ago desc
) as first_value,
first_value(date) over (
partition by country_name order by days_ago desc
) as first_date,
first_value(cumulative_value) over (
partition by country_name order by days_ago
) as last_value,
first_value(date) over (
partition by country_name order by days_ago
) as last_date
from filtered_data
),
diff as (
select
country_name,
date,
first_date,
last_date,
days_ago,
cumulative_value,
first_value,
last_value,
last_value - first_value as difference,
round(
100 * safe_divide((last_value - first_value), first_value), 2
) as percent_difference
from first_and_last_values
)
select *
from diff
where days_ago = 1
order by difference desc
"""
# def bitcoin_business_growth_timeseries(query):
# pipeline = [
# {
# "$match": {
# "days_ago": {"$lte": int(query["days_ago"])},
# "country_name": query["country_name"],
# }
# },
# {
# "$project": {
# "country_name": "$country_name",
# "date": "$date",
# "cumulative_value": "$cumulative_value",
# }
# },
# {"$sort": {"country_name": 1, "days_ago": 1}},
# ]
# return pipeline
# def mangrove_by_country_latest():
# pipeline = [
# {
# "$match": {"year": "2020"},
# },
# ]
# return pipeline
#
#
# def mangrove_by_country_agg(query):
# pipeline = [
# {"$match": {"country_with_parent": query["country_with_parent"]}},
# {
# "$group": {
# "_id": {"country_with_parent": "$country_with_parent", "year": "$year"},
# "total_pixels": {"$sum": "$total_n_pixels"},
# }
# },
# {
# "$project": {
# "_id": 0,
# "country_with_parent": "$_id.country_with_parent",
# "year": "$_id.year",
# "total_pixels": 1,
# }
# },
# {"$sort": {"year": 1}},
# ]
# return pipeline
#
#
# def bitcoin_business_growth_timeseries(query):
# pipeline = [
# {
# "$match": {
# "days_ago": {"$lte": int(query["days_ago"])},
# "country_name": query["country_name"],
# }
# },
# {
# "$project": {
# "country_name": "$country_name",
# "date": "$date",
# "cumulative_value": "$cumulative_value",
# }
# },
# {"$sort": {"country_name": 1, "days_ago": 1}},
# ]
# return pipeline
#
#
# def bitcoin_business_growth_percent_diff_days_ago(query):
pipeline = [
{"$match": {"days_ago": {"$lte": int(query["days_ago"])}}},
{"$sort": {"country_name": 1, "days_ago": 1}},
{
"$group": {
"_id": "$country_name",
"firstvalue": {"$first": "$cumulative_value"},
"lastvalue": {"$last": "$cumulative_value"},
"firstdate": {"$min": "$date"},
"lastdate": {"$max": "$date"},
}
},
{
"$project": {
"country_name": "$_id",
"first_value": "$firstvalue",
"last_value": "$lastvalue",
"difference": {
"$subtract": [
{"$todouble": "$firstvalue"},
{"$todouble": "$lastvalue"},
]
},
"first_date": "$firstdate",
"last_date": "$lastdate",
"percent_difference": {
"$cond": {
"if": {"$eq": [{"$todouble": "$lastvalue"}, 0]},
"then": {
"$cond": {
"if": {"$gt": [{"$todouble": "$firstvalue"}, 0]},
"then": "new",
"else": "none",
}
},
"else": {
"$round": [
{
"$multiply": [
{
"$divide": [
{
"$subtract": [
{"$todouble": "$firstvalue"},
{"$todouble": "$lastvalue"},
]
},
{"$todouble": "$lastvalue"},
]
},
100,
]
}
]
},
}
},
}
},
]
return pipeline
#
#
# def bitcoin_business_growth_latest(query):
# pipeline = [
# {
# "$match": query["filter"],
# },
# {"$sort": {"date": 1}},
# ]
# return pipeline