Options

How to sum up Sales by each year's months?

DanielHuberDanielHuber Member Posts: 1 Learner I
edited November 2018 in Help

Hello everybody,

 

i don't find the solution how to sum up quantities of sales by each year, with a time span of three to four years.

 

e.g. There are sales data from January 1st, 2014 to December 31st, 2017 in the format MMM, D YYYY

 

Now I want to change the Date format to JJJJ, MM in fact to be able to sum up on this operator.

 

 

My result should be the following:

 

Year, Month                sum (Year, Month)

2014, 01                     50 tons

...

2014, 12                     45 tons

2015, 01                     59 tons

...

2015, 12                     41 tons

2016, 01                     39 tons

...

2016, 12                     20 tons

2017, 01                     53 tons

...

2017, 12                     32 tons

 

How can i change my Date Format to achieve this? Is it aso possible to change to a german date format as expected above?

 

Best regards,

Daniel

Answers

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,508 RM Data Scientist

    Dear Daniel,

     

    do i see this correctly that you have data like:

    1/1/2017 10

    2/1/2017 20

    3/1/2017 30

    ...

    so daily sales volumes, and what to do up to monthly? If yes, use Date to Numerical to extract the Month with respect to epoch and run Aggregate with group by date afterwards. Attached is an example process to demonstrate this.

     

    Best,
    Martin

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.2.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" breakpoints="after" class="subprocess" compatibility="8.2.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="85">
    <process expanded="true">
    <operator activated="true" class="generate_data" compatibility="8.2.000" expanded="true" height="68" name="Generate Data" width="90" x="45" y="34"/>
    <operator activated="true" class="generate_id" compatibility="8.2.000" expanded="true" height="82" name="Generate ID" width="90" x="179" y="34"/>
    <operator activated="true" class="generate_attributes" compatibility="8.2.000" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
    <list key="function_descriptions">
    <parameter key="date" value="date_add(date_now(),-1*id,DATE_UNIT_DAY)"/>
    </list>
    </operator>
    <operator activated="true" class="rename" compatibility="8.2.000" expanded="true" height="82" name="Rename" width="90" x="581" y="34">
    <parameter key="old_name" value="att1"/>
    <parameter key="new_name" value="sales"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.2.000" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="sales|date"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <connect from_op="Generate Data" from_port="output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">Create Dummy Data</description>
    </operator>
    <operator activated="true" class="date_to_numerical" compatibility="8.2.000" expanded="true" height="82" name="Date to Numerical" width="90" x="179" y="85">
    <parameter key="attribute_name" value="date"/>
    <parameter key="time_unit" value="month"/>
    <parameter key="week_relative_to" value="epoch"/>
    <parameter key="keep_old_attribute" value="true"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.2.000" expanded="true" height="103" name="Multiply" width="90" x="380" y="85"/>
    <operator activated="true" class="remove_duplicates" compatibility="8.2.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="782" y="187">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="date_month"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34">
    <list key="aggregation_attributes">
    <parameter key="sales" value="sum"/>
    </list>
    <parameter key="group_by_attributes" value="date_month"/>
    </operator>
    <operator activated="true" class="concurrency:join" compatibility="8.2.000" expanded="true" height="82" name="Join" width="90" x="916" y="34">
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="date_month" value="date_month"/>
    </list>
    </operator>
    <connect from_op="Subprocess" from_port="out 1" to_op="Date to Numerical" to_port="example set input"/>
    <connect from_op="Date to Numerical" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Join" to_port="right"/>
    <connect from_op="Aggregate" from_port="example set output" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <description align="center" color="yellow" colored="false" height="370" resized="true" width="388" x="674" y="10">For Better Readability of the date</description>
    </process>
    </operator>
    </process>
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
Sign In or Register to comment.