This could be improved, but does the trick for now. This is for a report configured as "daily" with resource IDs:
CREATE EXTERNAL TABLE IF NOT EXISTS `org_data`.`curv2` (
`discount_bundled_discount` string,
`discount_total_discount` string,
`identity_line_item_id` string,
`identity_time_interval` string,
`line_item_availability_zone` string,
`line_item_blended_cost` double,
`line_item_blended_rate` string,
`line_item_currency_code` string,
`line_item_legal_entity` string,
`line_item_line_item_description` string,
`line_item_line_item_type` string,
`line_item_net_unblended_cost` string,
`line_item_net_unblended_rate` string,
`line_item_normalization_factor` double,
`line_item_normalized_usage_amount` double,
`line_item_operation` string,
`line_item_product_code` string,
`line_item_resource_id` string,
`line_item_tax_type` string,
`line_item_unblended_cost` double,
`line_item_unblended_rate` string,
`line_item_usage_account_id` string,
`line_item_usage_account_name` string,
`line_item_usage_amount` double,
`line_item_usage_end_date` timestamp,
`line_item_usage_start_date` timestamp,
`line_item_usage_type` string,
`pricing_currency` string,
`pricing_lease_contract_length` string,
`pricing_offering_class` string,
`pricing_public_on_demand_cost` double,
`pricing_public_on_demand_rate` string,
`pricing_purchase_option` string,
`pricing_rate_code` string,
`pricing_rate_id` string,
`pricing_term` string,
`pricing_unit` string,
`product` map < string,
string >,
`product_comment` string,
`product_fee_code` string,
`product_fee_description` string,
`product_from_location` string,
`product_from_location_type` string,
`product_from_region_code` string,
`product_instance_family` string,
`product_instance_type` string,
`product_instancesku` string,
`product_location` string,
`product_location_type` string,
`product_operation` string,
`product_pricing_unit` string,
`product_product_family` string,
`product_region_code` string,
`product_servicecode` string,
`product_sku` string,
`product_to_location` string,
`product_to_location_type` string,
`product_to_region_code` string,
`product_usagetype` string,
`reservation_amortized_upfront_cost_for_usage` double,
`reservation_amortized_upfront_fee_for_billing_period` double,
`reservation_availability_zone` string,
`reservation_effective_cost` double,
`reservation_end_time` string,
`reservation_modification_status` string,
`reservation_net_amortized_upfront_cost_for_usage` string,
`reservation_net_amortized_upfront_fee_for_billing_period` string,
`reservation_net_effective_cost` string,
`reservation_net_recurring_fee_for_usage` string,
`reservation_net_unused_amortized_upfront_fee_for_billing_period` string,
`reservation_net_unused_recurring_fee` string,
`reservation_net_upfront_value` string,
`reservation_normalized_units_per_reservation` string,
`reservation_number_of_reservations` string,
`reservation_recurring_fee_for_usage` double,
`reservation_reservation_a_r_n` string,
`reservation_start_time` string,
`reservation_subscription_id` string,
`reservation_total_reserved_normalized_units` string,
`reservation_total_reserved_units` string,
`reservation_units_per_reservation` string,
`reservation_unused_amortized_upfront_fee_for_billing_period` double,
`reservation_unused_normalized_unit_quantity` double,
`reservation_unused_quantity` double,
`reservation_unused_recurring_fee` double,
`reservation_upfront_value` double,
`resource_tags` map < string,
string >,
`savings_plan_amortized_upfront_commitment_for_billing_period` double,
`savings_plan_end_time` string,
`savings_plan_instance_type_family` string,
`savings_plan_net_amortized_upfront_commitment_for_billing_period` string,
`savings_plan_net_recurring_commitment_for_billing_period` string,
`savings_plan_net_savings_plan_effective_cost` string,
`savings_plan_offering_type` string,
`savings_plan_payment_option` string,
`savings_plan_purchase_term` string,
`savings_plan_recurring_commitment_for_billing_period` double,
`savings_plan_region` string,
`savings_plan_savings_plan_a_r_n` string,
`savings_plan_savings_plan_effective_cost` double,
`savings_plan_savings_plan_rate` double,
`savings_plan_start_time` string,
`savings_plan_total_commitment_to_date` double,
`savings_plan_used_commitment` double,
`bill_bill_type` string,
`bill_billing_entity` string,
`bill_billing_period_end_date` timestamp,
`bill_billing_period_start_date` timestamp,
`bill_invoice_id` string,
`bill_invoicing_entity` string,
`bill_payer_account_id` string,
`bill_payer_account_name` string,
`cost_category` map < string,
string >,
`discount` map < string,
string >
) COMMENT "Cost and usage V2, hourly, with resource IDs"
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://path-to-your-curv2-export/data/'
TBLPROPERTIES (
'classification' = 'parquet',
'parquet.compression' = 'SNAPPY'
);
select
line_item_usage_start_date as start_date,
bill_payer_account_id as billing_account,
line_item_usage_account_id as usage_account,
line_item_usage_account_name as usage_account_name,
product_region_code as region_code,
line_item_product_code as product_code,
product_servicecode as service,
product_product_family as product_family,
line_item_usage_type as line_usage_type,
product_usagetype as prd_usage_type,
line_item_line_item_description as description,
line_item_line_item_type as line_type,
pricing_unit as unit_of_measure,
line_item_usage_amount as units_consumed,
line_item_unblended_rate as unit_price,
line_item_unblended_cost as cost,
resource_tags
from curv2
;