Chapter 3 - Schema Files

This tutorial is based on the schema_files.py example, which can be found in the TRAC GitHub Repository under examples/models/python.

schema_files.py
import typing as tp
import tracdap.rt.api as trac

import tutorial.using_data as using_data
import tutorial.schemas as schemas


class SchemaFilesModel(trac.TracModel):

    def define_attributes(self) -> tp.List[trac.TagUpdate]:

        return trac.define_attributes(
            trac.A("model_description", "A example model, for testing purposes"),
            trac.A("business_segment", "retail_products", categorical=True),
            trac.A("classifiers", ["loans", "uk", "examples"], attr_type=trac.STRING)
        )

    def define_parameters(self) -> tp.Dict[str, trac.ModelParameter]:

        return trac.define_parameters(

            trac.P("eur_usd_rate", trac.FLOAT,
                   label="EUR/USD spot rate for reporting"),

            trac.P("default_weighting", trac.FLOAT,
                   label="Weighting factor applied to the profit/loss of a defaulted loan"),

            trac.P("filter_defaults", trac.BOOLEAN,
                   label="Exclude defaulted loans from the calculation",
                   default_value=False))

    def define_inputs(self) -> tp.Dict[str, trac.ModelInputSchema]:

        customer_loans = trac.load_schema(schemas, "customer_loans.csv")

        return {"customer_loans": trac.ModelInputSchema(customer_loans)}

    def define_outputs(self) -> tp.Dict[str, trac.ModelOutputSchema]:

        profit_by_region = trac.load_schema(schemas, "profit_by_region.csv")

        return {"profit_by_region": trac.ModelOutputSchema(profit_by_region)}

    def run_model(self, ctx: trac.TracContext):

        eur_usd_rate = ctx.get_parameter("eur_usd_rate")
        default_weighting = ctx.get_parameter("default_weighting")
        filter_defaults = ctx.get_parameter("filter_defaults")

        customer_loans = ctx.get_pandas_table("customer_loans")

        profit_by_region = using_data.calculate_profit_by_region(
            customer_loans, eur_usd_rate,
            default_weighting, filter_defaults)

        ctx.put_pandas_table("profit_by_region", profit_by_region)


if __name__ == "__main__":
    import tracdap.rt.launch as launch
    launch.launch_model(SchemaFilesModel, "config/using_data.yaml", "config/sys_config.yaml")
customer_loans.csv

field_name

field_type

label

categorical

business_key

format_code

id

STRING

Customer account ID

false

true

loan_amount

DECIMAL

Principal loan amount

false

false

total_pymnt

DECIMAL

Total amount repaid

false

false

region

STRING

Customer home region

true

false

loan_condition_cat

INTEGER

Loan condition category

false

false

profit_by_region.csv

field_name

field_type

label

categorical

business_key

format_code

region

STRING

Customer home region

true

false

gross_profit

DECIMAL

Total gross profit

false

false

The system config and job config files are exactly the same as in the using_data example, they do not need to change when schemas are defined in schema files.