Time Series & Feature Engineering Questions

rpleanerrpleaner Member Posts: 5 Contributor I
edited December 2018 in Help

Hi,guys

   I am trying to solve a sales forcast problem: there is a monthly sales table(attribute:time,sales)  and a consumer record table(attribute:order time,A(id),B,C,D) , assuming that sales are related to the consumer's attribute,  How should I creat proper feature as input to build a timeseries model to make predictions? 

there are many ways to create input by counting instances in different dimensions as input. as follows

 var1 =count A when B=b1,

var2 =count A when B=b2

var3 = count A when B=b1,C=c1,D=d1

var4 = count A when B=b1,C=c1,D=d1

 

How to select proper input for time series prediction from these variable?Is this the right way to creat feature?

Anybody have any ideas?  Appreciate a lot for any tips!  Would u mind looking at this?:p @Thomas_Ott

 

 

 

Tagged:

Answers

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @rpleaner do you have a sample process to share?

  • rpleanerrpleaner Member Posts: 5 Contributor I

    I try to build simlar input tables using Generate Data Operators in Rapidminer as follows.

    Supposing that customers' attributes are related to monthly sales, how to create proper input for time series forcasting of next month's sales ?

     

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.015">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="generate_nominal_data" compatibility="5.3.015" expanded="true" height="60" name="Generate Nominal Data" width="90" x="45" y="210"/>
    <operator activated="true" class="generate_id" compatibility="5.3.015" expanded="true" height="76" name="Generate ID" width="90" x="179" y="210"/>
    <operator activated="true" class="rename" compatibility="5.3.015" expanded="true" height="76" name="Rename" width="90" x="313" y="210">
    <parameter key="old_name" value="id"/>
    <parameter key="new_name" value="customer_id"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="447" y="210">
    <parameter key="attribute_name" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_sales_data" compatibility="5.3.015" expanded="true" height="60" name="Generate Sales Data" width="90" x="45" y="30"/>
    <operator activated="true" class="date_to_nominal" compatibility="5.3.015" expanded="true" height="76" name="Date to Nominal" width="90" x="179" y="30">
    <parameter key="attribute_name" value="date"/>
    <parameter key="date_format" value="yyyy.MM"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="30">
    <list key="function_descriptions">
    <parameter key="sales " value="amount*single_price"/>
    </list>
    </operator>
    <operator activated="true" class="join" compatibility="5.3.015" expanded="true" height="76" name="Join" width="90" x="581" y="120">
    <parameter key="join_type" value="left"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="customer_id" value="customer_id"/>
    </list>
    </operator>
    <operator activated="true" class="aggregate" compatibility="5.3.015" expanded="true" height="76" name="custom" width="90" x="715" y="120">
    <list key="aggregation_attributes">
    <parameter key="customer_id" value="count"/>
    </list>
    <parameter key="group_by_attributes" value="|att5|att1|att2|att3|att4"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="5.3.015" expanded="true" height="76" name="monthly sales" width="90" x="514" y="30">
    <list key="aggregation_attributes">
    <parameter key="sales " value="sum"/>
    </list>
    <parameter key="group_by_attributes" value="|date"/>
    </operator>
    <connect from_op="Generate Nominal Data" from_port="output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Join" to_port="right"/>
    <connect from_op="Generate Sales Data" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
    <connect from_op="Date to Nominal" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="monthly sales" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="original" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_op="custom" to_port="example set input"/>
    <connect from_op="custom" from_port="example set output" to_port="result 2"/>
    <connect from_op="monthly sales" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

     @Thomas_Ott 

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @rpleaner you can try something like this:

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="generate_nominal_data" compatibility="8.1.001" expanded="true" height="68" name="Generate Nominal Data" width="90" x="45" y="210"/>
    <operator activated="true" class="generate_id" compatibility="8.1.001" expanded="true" height="82" name="Generate ID" width="90" x="179" y="210"/>
    <operator activated="true" class="rename" compatibility="8.1.001" expanded="true" height="82" name="Rename" width="90" x="313" y="210">
    <parameter key="old_name" value="id"/>
    <parameter key="new_name" value="customer_id"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="447" y="210">
    <parameter key="attribute_name" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_sales_data" compatibility="8.1.001" expanded="true" height="68" name="Generate Sales Data" width="90" x="45" y="30"/>
    <operator activated="true" class="date_to_nominal" compatibility="8.1.001" expanded="true" height="82" name="Date to Nominal" width="90" x="179" y="30">
    <parameter key="attribute_name" value="date"/>
    <parameter key="date_format" value="yyyy.MM"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="30">
    <list key="function_descriptions">
    <parameter key="sales " value="amount*single_price"/>
    </list>
    </operator>
    <operator activated="true" class="join" compatibility="8.1.001" expanded="true" height="82" name="Join" width="90" x="581" y="120">
    <parameter key="join_type" value="left"/>
    <list key="key_attributes">
    <parameter key="customer_id" value="customer_id"/>
    </list>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="715" y="136">
    <parameter key="window_size" value="10"/>
    <parameter key="create_label" value="true"/>
    <parameter key="label_attribute" value="amount"/>
    <parameter key="horizon" value="5"/>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="849" y="238">
    <parameter key="window_size" value="10"/>
    </operator>
    <operator activated="false" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="custom" width="90" x="782" y="340">
    <list key="aggregation_attributes">
    <parameter key="customer_id" value="count"/>
    </list>
    <parameter key="group_by_attributes" value="|att5|att1|att2|att3|att4"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="6.0.006" expanded="true" height="82" name="monthly sales" width="90" x="581" y="34">
    <list key="aggregation_attributes">
    <parameter key="sales " value="sum"/>
    </list>
    <parameter key="group_by_attributes" value="|date"/>
    </operator>
    <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="145" name="Validation" width="90" x="983" y="85">
    <parameter key="training_window_width" value="15"/>
    <parameter key="training_window_step_size" value="1"/>
    <parameter key="test_window_width" value="15"/>
    <parameter key="horizon" value="5"/>
    <process expanded="true">
    <operator activated="true" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model" width="90" x="112" y="34">
    <list key="beta_constraints"/>
    <list key="expert_parameters"/>
    </operator>
    <connect from_port="training" to_op="Generalized Linear Model" to_port="training set"/>
    <connect from_op="Generalized Linear Model" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34"/>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    <portSpacing port="sink_averagable 3" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="1117" y="289">
    <list key="application_parameters"/>
    </operator>
    <connect from_op="Generate Nominal Data" from_port="output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Join" to_port="right"/>
    <connect from_op="Generate Sales Data" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
    <connect from_op="Date to Nominal" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="monthly sales" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="original" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_op="Windowing" to_port="example set input"/>
    <connect from_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
    <connect from_op="Windowing" from_port="original" to_op="Windowing (2)" to_port="example set input"/>
    <connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <connect from_op="monthly sales" from_port="example set output" to_port="result 1"/>
    <connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
    <connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    </process>
    </operator>
    </process>
Sign In or Register to comment.