Transform the raw data into business data
The sales system API expects the traffic data in a specific format. You currently have all the necessary raw data, but you can not send that as is since the API would not understand that format.
You decide to implement a keyword to handle the transformation of raw data to sales system API payloads:
from robocorp.tasks import task
from RPA.HTTP import HTTP
from RPA.JSON import JSON
from RPA.Tables import Tables
http = HTTP()
json = JSON()
table = Tables()
TRAFFIC_JSON_FILE_PATH = "output/traffic.json"
@task
def produce_traffic_data():
"""
Inhuman Insurance, Inc. Artificial Intelligence System automation.
Produces traffic data work items.
"""
http.download(
url="https://github.com/robocorp/inhuman-insurance-inc/raw/main/RS_198.json",
target_file=TRAFFIC_JSON_FILE_PATH,
overwrite=True,
)
traffic_data = load_traffic_data_as_table()
filtered_data = filter_and_sort_traffic_data(traffic_data)
filtered_data = get_latest_data_by_country(filtered_data)
payloads = create_work_item_payloads(filtered_data)
@task
def consume_traffic_data():
"""
Inhuman Insurance, Inc. Artificial Intelligence System robot.
Consumes traffic data work items.
"""
print("consume")
def load_traffic_data_as_table():
json_data = json.load_json_from_file(TRAFFIC_JSON_FILE_PATH)
return table.create_table(json_data["value"])
def filter_and_sort_traffic_data(data):
rate_key = "NumericValue"
max_rate = 5.0
gender_key = "Dim1"
both_genders = "BTSX"
year_key = "TimeDim"
table.filter_table_by_column(data, rate_key, "<", max_rate)
table.filter_table_by_column(data, gender_key, "==", both_genders)
table.sort_table_by_column(data, year_key, False)
return data
def get_latest_data_by_country(data):
country_key = "SpatialDim"
data = table.group_table_by_column(data, country_key)
latest_data_by_country = []
for group in data:
first_row = table.pop_table_row(group)
latest_data_by_country.append(first_row)
return latest_data_by_country
def create_work_item_payloads(traffic_data):
payloads = []
for row in traffic_data:
payload = dict(
country = row["SpatialDim"],
year = row["TimeDim"],
rate = row["NumericValue"]
)
payloads.append(payload)
return payloads
- The
create_work_item_payloads()
function loops the list of traffic data - essentially rows. - For each row, you create a new dictionary (a data structure that supports named keys).
- You append the dictionaries to a list that you then return from the keyword.
After running the robot, the log provides a small snippet of the payloads:
payloads = [{'country': 'VCT', 'year': 2011, 'rate': 3.69293}, {'country': 'SWError', 'year': 2019, 'rate': 3.13947}, {'country': 'SWE', 'year': 2018, 'rate': 3.61718}...
Looks good. But what's that smell? SpatialDim
, TimeDim
, NumericValue
appear in the code more than once. Duplication? In your code? Better deal with it immediately.
You decide to create variables for the data keys to avoid duplicating the values:
from robocorp.tasks import task
from RPA.HTTP import HTTP
from RPA.JSON import JSON
from RPA.Tables import Tables
http = HTTP()
json = JSON()
table = Tables()
TRAFFIC_JSON_FILE_PATH = "output/traffic.json"
# JSON data keys
COUNTRY_KEY = "SpatialDim"
YEAR_KEY = "TimeDim"
RATE_KEY = "NumericValue"
GENDER_KEY = "Dim1"
@task
def produce_traffic_data():
"""
Inhuman Insurance, Inc. Artificial Intelligence System automation.
Produces traffic data work items.
"""
http.download(
url="https://github.com/robocorp/inhuman-insurance-inc/raw/main/RS_198.json",
target_file=TRAFFIC_JSON_FILE_PATH,
overwrite=True,
)
traffic_data = load_traffic_data_as_table()
filtered_data = filter_and_sort_traffic_data(traffic_data)
filtered_data = get_latest_data_by_country(filtered_data)
payloads = create_work_item_payloads(filtered_data)
@task
def consume_traffic_data():
"""
Inhuman Insurance, Inc. Artificial Intelligence System robot.
Consumes traffic data work items.
"""
print("consume")
def load_traffic_data_as_table():
json_data = json.load_json_from_file(TRAFFIC_JSON_FILE_PATH)
return table.create_table(json_data["value"])
def filter_and_sort_traffic_data(data):
max_rate = 5.0
both_genders = "BTSX"
table.filter_table_by_column(data, RATE_KEY, "<", max_rate)
table.filter_table_by_column(data, GENDER_KEY, "==", both_genders)
table.sort_table_by_column(data, YEAR_KEY, False)
return data
def get_latest_data_by_country(data):
data = table.group_table_by_column(data, COUNTRY_KEY)
latest_data_by_country = []
for group in data:
first_row = table.pop_table_row(group)
latest_data_by_country.append(first_row)
return latest_data_by_country
def create_work_item_payloads(traffic_data):
payloads = []
for row in traffic_data:
payload = dict(
country=row[COUNTRY_KEY],
year=row[YEAR_KEY],
rate=row[RATE_KEY],
)
payloads.append(payload)
return payloads
That's better!