Class to interact with a Squirrels project from Python
This class is used to interact with a Squirrels project from Python.For example, you can create a SquirrelsProject object in Python (or Jupyter Notebook) as such:
Copy
from squirrels import SquirrelsProjectsqrl = SquirrelsProject(filepath="path/to/squirrels/project/")
And then call methods on the SquirrelsProject object to perform various operations.This class can be imported from the squirrels module.
Run all compiled queries and save each result as CSV (runtime only). If True and selected_model is specified, all upstream models of the selected model are compiled as well.
Use this method to deliberately close any database connections opened by the SquirrelsProject object.The database connections may still be opened again by other methods invoked on the SquirrelsProject object after this method is called.
Here are some common usage patterns for the SquirrelsProject class. It is assumed that the code is running in an async context (e.g. inside an async function or a Jupyter Notebook cell).
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# Compile all modelsawait sqrl.compile()# Compile a specific model and print its SQLawait sqrl.compile(selected_model="my_model")# Compile with a specific test setawait sqrl.compile(test_set="test_set_1")# Build the virtual data environmentawait sqrl.build()# Full refresh build (drop and rebuild everything)await sqrl.build(full_refresh=True)# Build only a specific modelawait sqrl.build(select="my_model")
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# Get dataset metadatametadata = sqrl.dataset_metadata("sales_data")print(f"Description: {metadata.target_model_config.description}")print(f"Columns: {[col.name for col in metadata.target_model_config.columns]}")# Query a dataset with parameter selectionsresult = await sqrl.dataset( "sales_data", selections={ "date_range": ["2024-01-01", "2024-12-31"], "region": "north-america" })# The result.df attribute is a polars DataFrameprint(result.df.head())# Convert to pandas if neededpandas_df = result.df.to_pandas()print(pandas_df.head())
from squirrels import SquirrelsProjectfrom squirrels.auth import RegisteredUser, CustomUserFieldsfrom pyconfigs.user import CustomUserFieldssqrl = SquirrelsProject()# Note: CustomUserFields must be extended in your project's user.pycustom_fields = CustomUserFields() # Use your extended class# Create a registered user with custom fieldsuser = RegisteredUser(username="john_doe", custom_fields=custom_fields)# Query dataset as authenticated userresult = await sqrl.dataset( "protected_data", selections={"year": "2024"}, user=user)# Querying protected datasets with code also allows for bypassing authenticationresult = await sqrl.dataset( "protected_data", selections={"year": "2024"}, require_auth=False)# Access the polars DataFrameprint(result.df)
from squirrels import SquirrelsProjectfrom squirrels.dashboards import PngDashboard, HtmlDashboardsqrl = SquirrelsProject()# Get a PNG dashboardpng_dashboard = await sqrl.dashboard( "sales_overview", selections={"quarter": "Q1"}, dashboard_type=PngDashboard)png_dashboard
Render an HTML dashboard in a Jupyter Notebook:
Copy
from squirrels import SquirrelsProjectfrom squirrels.dashboards import HtmlDashboardsqrl = SquirrelsProject()# Get an HTML dashboardhtml_dashboard = await sqrl.dashboard( "interactive_report", selections={"year": "2024"}, dashboard_type=HtmlDashboard)html_dashboard
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# Query models using custom SQLresult = await sqrl.query_models( "SELECT * FROM my_model WHERE amount > 1000", selections={"date_param": "2024-01-01"})# Access the polars DataFrameprint(result.df)# Get compiled query for a specific modelcompiled = await sqrl.get_compiled_model_query( "my_model", selections={"region": "west"})print(f"Language: {compiled.language}")print(f"Query:\n{compiled.definition}")
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# Get a seed as a polars LazyFrameseed_lf = sqrl.seed("lookup_table")# Collect and print the datadf = seed_lf.collect()print(df)
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# List all data modelsmodels = await sqrl.get_all_data_models()for model in models: print(f"{model.name} ({model.model_type}): queryable={model.is_queryable}")# Get data lineagelineage = await sqrl.get_all_data_lineage()for relation in lineage: print(f"{relation.source.name} ({relation.source.type}) -> " f"{relation.target.name} ({relation.target.type}) [{relation.type}]")
from squirrels import SquirrelsProjectsqrl = SquirrelsProject()# Perform operations...# Close database connections associated with the project when donesqrl.close()