🥳 RAPIDMINER 9.9 IS OUT!!! 🥳
The updates in 9.9 power advanced use cases and offer productivity enhancements for users who prefer to code.
CLICK HERE TO DOWNLOAD
How to perform aggregation on year and half year data of customer


Hello , |
form the attached file I want to generate the below table but from generation of data I have a Configuration file that contains one column "BIN" contains month, quarter,half-year and year, another column "ENABLE" that contain Yes or No. I used the filter operator to calculate values only on Yes attribute and store in the respective columns.
Customer Daily-AvgAmt | Daily-STDAmt | Daily-AvgVolume | Daily-STDVolume |
above for daily same column added for Monthly/Yearly
I want to calculate the "number of transaction", "avg amount"," std devation", "Daily Limit " of the customer as per selected value in the config(Year/Half year)
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Config_ProfileOnBins" width="90" x="45" y="34">
<parameter key="repository_entry" value="../Data/Config_ProfileOnBins"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.5.001" expanded="true" height="103" name="Filter Examples (22)" width="90" x="179" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="ENABLE.equals.Yes"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
<parameter key="macro" value="number_of_bins"/>
<parameter key="macro_type" value="number_of_examples"/>
<parameter key="statistics" value="average"/>
<parameter key="attribute_name" value=""/>
<list key="additional_macros"/>
</operator>
<operator activated="false" class="utility:create_exampleset" compatibility="9.5.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="595">
<parameter key="generator_type" value="numeric series"/>
<parameter key="number_of_examples" value="1000"/>
<parameter key="use_stepsize" value="true"/>
<list key="function_descriptions"/>
<parameter key="add_id_attribute" value="false"/>
<list key="numeric_series_configuration">
<parameter key="ProfileID" value="linear.1\.0.1\.0"/>
</list>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="column_separator" value=","/>
<parameter key="parse_all_as_nominal" value="false"/>
<parameter key="decimal_point_character" value="."/>
<parameter key="trim_attribute_names" value="true"/>
</operator>
<operator activated="false" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="246" y="595">
<list key="function_descriptions">
<parameter key="NEW_ID" value="concat("CBP",suffix(concat("0000",str(ProfileID)),5))"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve dummy Data" width="90" x="45" y="238">
<parameter key="repository_entry" value="../Data/dummy Data"/>
</operator>
<operator activated="true" breakpoints="after" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="238">
<process expanded="true">
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="year"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="era"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (2)" width="90" x="179" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="quarter"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Exract Quarter of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (3)" width="90" x="313" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="month"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="year"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Month of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (4)" width="90" x="447" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="week"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Week of Month</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (6)" width="90" x="581" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="half year"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="month"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Half of Year</description>
</operator>
<operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (15)" width="90" x="715" y="34">
<parameter key="attribute_name" value="InputDateTime"/>
<parameter key="time_unit" value="day"/>
<parameter key="millisecond_relative_to" value="second"/>
<parameter key="second_relative_to" value="minute"/>
<parameter key="minute_relative_to" value="hour"/>
<parameter key="hour_relative_to" value="day"/>
<parameter key="day_relative_to" value="week"/>
<parameter key="week_relative_to" value="month"/>
<parameter key="month_relative_to" value="year"/>
<parameter key="quarter_relative_to" value="year"/>
<parameter key="half_year_relative_to" value="year"/>
<parameter key="year_relative_to" value="epoch"/>
<parameter key="keep_old_attribute" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Extract Day of Month</description>
</operator>
<operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename Extracted Features" width="90" x="849" y="34">
<parameter key="old_name" value="InputDateTime_year"/>
<parameter key="new_name" value="YEAR"/>
<list key="rename_additional_attributes">
<parameter key="InputDateTime_quarter" value="QUARTER"/>
<parameter key="InputDateTime_month" value="MONTH"/>
<parameter key="InputDateTime_week" value="WEEK"/>
<parameter key="InputDateTime_half year" value="HALF YEAR"/>
<parameter key="InputDateTime_day" value="DAY"/>
</list>
</operator>
<connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Date to Numerical (2)" to_port="example set input"/>
<connect from_op="Date to Numerical (2)" from_port="example set output" to_op="Date to Numerical (3)" to_port="example set input"/>
<connect from_op="Date to Numerical (3)" from_port="example set output" to_op="Date to Numerical (4)" to_port="example set input"/>
<connect from_op="Date to Numerical (4)" from_port="example set output" to_op="Date to Numerical (6)" to_port="example set input"/>
<connect from_op="Date to Numerical (6)" from_port="example set output" to_op="Date to Numerical (15)" to_port="example set input"/>
<connect from_op="Date to Numerical (15)" from_port="example set output" to_op="Rename Extracted Features" to_port="example set input"/>
<connect from_op="Rename Extracted Features" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Data Preparation</description>
</operator>
<operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename" width="90" x="313" y="238">
<parameter key="old_name" value="Amount"/>
<parameter key="new_name" value="AMOUNT"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="concurrency:loop_values" compatibility="9.5.001" expanded="true" height="103" name="Loop Values" width="90" x="514" y="187">
<parameter key="attribute" value="%{number_of_bins}"/>
<parameter key="iteration_macro" value="loop_bin"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (3)" width="90" x="246" y="238">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="average"/>
<list key="aggregation_attributes">
<parameter key="AMOUNT" value="average"/>
<parameter key="AMOUNT" value="minimum"/>
<parameter key="AMOUNT" value="maximum"/>
<parameter key="AMOUNT" value="standard_deviation"/>
<parameter key="AMOUNT" value="median"/>
<parameter key="AMOUNT" value="sum"/>
</list>
<parameter key="group_by_attributes" value="Customer|%{loop_bin}"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" breakpoints="after" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (256)" width="90" x="380" y="238">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="average"/>
<list key="aggregation_attributes">
<parameter key="FREQUENCY" value="average"/>
<parameter key="FREQUENCY" value="minimum"/>
<parameter key="FREQUENCY" value="maximum"/>
<parameter key="FREQUENCY" value="standard_deviation"/>
<parameter key="FREQUENCY" value="median"/>
<parameter key="FREQUENCY" value="sum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="average"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="minimum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="maximum"/>
<parameter key="TOTAL_AMOUNT_PER_BIN" value="standard_deviation"/>
</list>
<parameter key="group_by_attributes" value="%{loop_bin}|Customer"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="9.5.001" expanded="true" height="82" name="Filter Example Range (2)" width="90" x="112" y="34">
<parameter key="first_example" value="%{iteration}"/>
<parameter key="last_example" value="%{iteration}"/>
<parameter key="invert_filter" value="false"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro (42)" width="90" x="246" y="34">
<parameter key="macro" value="key_attribute_type1"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="statistics" value="average"/>
<parameter key="attribute_name" value="BIN"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
</operator>
<connect from_port="input 1" to_op="Filter Example Range (2)" to_port="example set input"/>
<connect from_port="input 2" to_op="Aggregate (3)" to_port="example set input"/>
<connect from_op="Aggregate (3)" from_port="example set output" to_op="Aggregate (256)" to_port="example set input"/>
<connect from_op="Filter Example Range (2)" from_port="example set output" to_op="Extract Macro (42)" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="source_input 3" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Config_ProfileOnBins" from_port="output" to_op="Filter Examples (22)" to_port="example set input"/>
<connect from_op="Filter Examples (22)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Loop Values" to_port="input 1"/>
<connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes (5)" to_port="example set input"/>
<connect from_op="Retrieve dummy Data" from_port="output" to_op="Subprocess (2)" to_port="in 1"/>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Loop Values" to_port="input 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Best Answer
-
hbajpai Member Posts: 102  
 Unicorn
sgnarkhede2016,
Thanks for sharing the process and xlsx files with us. I think I understood your requirements and have created a process which aggregates half yearly and yearly values.
Take a look, and let me know what you think.<?xml version="1.0" encoding="UTF-8"?><process version="9.6.000"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="read_excel" compatibility="9.6.000" expanded="true" height="68" name="Read Excel (2)" width="90" x="45" y="85"> <parameter key="excel_file" value="C:\Users\harsh\Downloads\dummy Data.xlsx"/> <parameter key="sheet_selection" value="sheet number"/> <parameter key="sheet_number" value="1"/> <parameter key="imported_cell_range" value="A1"/> <parameter key="encoding" value="SYSTEM"/> <parameter key="first_row_as_names" value="true"/> <list key="annotations"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> <parameter key="read_all_values_as_polynominal" value="false"/> <list key="data_set_meta_data_information"> <parameter key="0" value="Customer.true.polynominal.attribute"/> <parameter key="1" value="InputDateTime.true.date_time.attribute"/> <parameter key="2" value="Amount.true.integer.attribute"/> </list> <parameter key="read_not_matching_values_as_missings" value="false"/> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> <description align="center" color="transparent" colored="false" width="126">Dummy Data file</description> </operator> <operator activated="true" class="rename" compatibility="9.6.000" expanded="true" height="82" name="Rename" width="90" x="179" y="85"> <parameter key="old_name" value="Amount"/> <parameter key="new_name" value="AMOUNT"/> <list key="rename_additional_attributes"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="85"> <list key="function_descriptions"> <parameter key="Date" value="date_str_custom(InputDateTime,"yyyy-MM-dd")"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Pivot" width="90" x="447" y="85"> <parameter key="group_by_attributes" value="Customer|Date"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer" value="count"/> <parameter key="AMOUNT" value="sum"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <operator activated="true" class="rename" compatibility="9.6.000" expanded="true" height="82" name="Rename (2)" width="90" x="581" y="85"> <parameter key="old_name" value="sum(AMOUNT)"/> <parameter key="new_name" value="Customer Daily Amt"/> <list key="rename_additional_attributes"> <parameter key="count(Customer)" value="Daily Volume"/> </list> </operator> <operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="715" y="85"> <list key="function_descriptions"> <parameter key="Year_Group" value="str(date_get(date_parse_custom(Date,"yyyy-MM-dd"),DATE_UNIT_YEAR))"/> <parameter key="Half_Year" value="if ( date_get(date_parse_custom(Date,"yyyy-MM-dd"),DATE_UNIT_MONTH) <= 6, str(1), str(2))"/> <parameter key="Half_Year_Group" value="concat(Year_Group,"-",Half_Year)"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="multiply" compatibility="9.6.000" expanded="true" height="103" name="Multiply" width="90" x="849" y="85"/> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Half Yearly" width="90" x="983" y="34"> <parameter key="group_by_attributes" value="Customer|Half_Year_Group"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer Daily Amt" value="average"/> <parameter key="Customer Daily Amt" value="standard deviation"/> <parameter key="Daily Volume" value="average"/> <parameter key="Daily Volume" value="standard deviation"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <operator activated="true" class="blending:pivot" compatibility="9.6.000" expanded="true" height="82" name="Yearly" width="90" x="983" y="187"> <parameter key="group_by_attributes" value="Customer|Year_Group"/> <parameter key="column_grouping_attribute" value=""/> <list key="aggregation_attributes"> <parameter key="Customer Daily Amt" value="average"/> <parameter key="Customer Daily Amt" value="standard deviation"/> <parameter key="Daily Volume" value="average"/> <parameter key="Daily Volume" value="standard deviation"/> </list> <parameter key="use_default_aggregation" value="false"/> <parameter key="default_aggregation_function" value="first"/> </operator> <connect from_op="Read Excel (2)" from_port="output" to_op="Rename" to_port="example set input"/> <connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/> <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="input"/> <connect from_op="Pivot" from_port="output" to_op="Rename (2)" to_port="example set input"/> <connect from_op="Rename (2)" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Half Yearly" to_port="input"/> <connect from_op="Multiply" from_port="output 2" to_op="Yearly" to_port="input"/> <connect from_op="Half Yearly" from_port="output" to_port="result 1"/> <connect from_op="Yearly" from_port="output" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="147"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> </process>
Best,
Harshit6