Time Lags Using the Windowing for Training and Sliding Window Validation Operator

pix123pix123 Member Posts: 27 Contributor I
edited December 2018 in Help

Hi there,

 

I am new to rapidminer and having issues in running a model to predict appliance usage based on a time series. The time is in 10 minute intervals. There are some additional pre-processing steps also included.

 

The process is failing to run and I can't figure out how to fix or improve. Would welcome any suggestions.

 

Thanks.

 

 

 

Current process...

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Retrieve Energy Set Version 2 " width="90" x="45" y="136">
        <parameter key="repository_entry" value="//Algorithm Module - College/Data Science Project/Energy Set Version 2 "/>
      </operator>
      <operator activated="true" class="nominal_to_date" compatibility="8.1.001" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="136">
        <parameter key="attribute_name" value="date"/>
        <parameter key="date_type" value="date_time"/>
        <parameter key="date_format" value="MM/dd/yyyy h:mm"/>
      </operator>
      <operator activated="true" class="anomalydetection:Local Outlier Factor (LOF)" compatibility="2.4.001" expanded="true" height="103" name="Local Outlier Factor (LOF)" width="90" x="313" y="34"/>
      <operator activated="true" class="filter_examples" compatibility="8.1.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="34">
        <parameter key="invert_filter" value="true"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="outlier.gt.1\.145"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="581" y="34">
        <parameter key="attribute_name" value="date"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing for Training" width="90" x="715" y="34">
        <parameter key="window_size" value="144"/>
        <parameter key="step_size" value="6"/>
        <parameter key="create_label" value="true"/>
        <parameter key="label_attribute" value="Appliances"/>
      </operator>
      <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Sliding W. Validation" width="90" x="581" y="187">
        <parameter key="training_window_width" value="144"/>
        <parameter key="training_window_step_size" value="1"/>
        <parameter key="horizon" value="5"/>
        <process expanded="true">
          <operator activated="true" class="support_vector_machine" compatibility="8.1.001" expanded="true" height="124" name="SVM" width="90" x="246" y="85"/>
          <connect from_port="training" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <connect from_op="SVM" from_port="estimated performance" to_port="through 1"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <portSpacing port="sink_through 2" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="246" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="Date|label|prediction(label)"/>
          </operator>
          <operator activated="true" class="performance_regression" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="380" y="136"/>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Select Attributes (2)" to_port="example set input"/>
          <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="source_through 2" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="916" y="136">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="label|Measure-0|Date"/>
      </operator>
      <connect from_op="Retrieve Energy Set Version 2 " from_port="output" to_op="Nominal to Date" to_port="example set input"/>
      <connect from_op="Nominal to Date" from_port="example set output" to_op="Local Outlier Factor (LOF)" to_port="example set"/>
      <connect from_op="Local Outlier Factor (LOF)" from_port="example set" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
      <connect from_op="Windowing for Training" from_port="example set output" to_op="Sliding W. Validation" to_port="training"/>
      <connect from_op="Sliding W. Validation" from_port="model" to_port="result 2"/>
      <connect from_op="Sliding W. Validation" from_port="training" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Sliding W. Validation" from_port="averagable 1" to_port="result 3"/>
      <connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="252"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="21"/>
    </process>
  </operator>
</process>

Answers

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 901   Unicorn

    Hi @pix123,

     

    Can you share your dataset in order we can reproduce the error ?

     

    Regards,

     

    Lionel 

  • pix123pix123 Member Posts: 27 Contributor I

    Thanks Lionel, please find attached.

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 901   Unicorn

    Hi again @pix123,

     

    Here you can find a process to forecast "Appliances".

    1. In this version of process, Window size = 20, you can modify this setting by : 

     - setting this parameter inside the Set Predictions_Params subprocess operator.

    and

     - modifying the parameters of Select Attributes and Rename operators inside the Loop operator.

     

    2. To improve this process, you can test differents models (Deep learning model is chosen arbitrarily....)

    and you can use the Optimize Parameters operator to determine the combinaison of parameters to obtain the best performance for your model.

     

    The process : 

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros>
    <macro>
    <key>futureMonths</key>
    <value>15</value>
    </macro>
    <macro>
    <key>horizon</key>
    <value>1</value>
    </macro>
    <macro>
    <key>windowSize</key>
    <value>6</value>
    </macro>
    </macros>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.003" expanded="true" height="68" name="Read CSV" width="90" x="45" y="136">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Energy_forecast\Dataset April .csv"/>
    <parameter key="column_separators" value=","/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="date.true.polynominal.attribute"/>
    <parameter key="1" value="Appliances.true.integer.attribute"/>
    <parameter key="2" value="lights.true.integer.attribute"/>
    <parameter key="3" value="T1.true.real.attribute"/>
    <parameter key="4" value="RH_1.true.real.attribute"/>
    <parameter key="5" value="T2.true.real.attribute"/>
    <parameter key="6" value="RH_2.true.real.attribute"/>
    <parameter key="7" value="T3.true.real.attribute"/>
    <parameter key="8" value="RH_3.true.real.attribute"/>
    <parameter key="9" value="T4.true.real.attribute"/>
    <parameter key="10" value="RH_4.true.real.attribute"/>
    <parameter key="11" value="T5.true.real.attribute"/>
    <parameter key="12" value="RH_5.true.real.attribute"/>
    <parameter key="13" value="T6.true.real.attribute"/>
    <parameter key="14" value="RH_6.true.real.attribute"/>
    <parameter key="15" value="T7.true.real.attribute"/>
    <parameter key="16" value="RH_7.true.real.attribute"/>
    <parameter key="17" value="T8.true.real.attribute"/>
    <parameter key="18" value="RH_8.true.real.attribute"/>
    <parameter key="19" value="T9.true.real.attribute"/>
    <parameter key="20" value="RH_9.true.real.attribute"/>
    <parameter key="21" value="T_out.true.real.attribute"/>
    <parameter key="22" value="Press_mm_hg.true.real.attribute"/>
    <parameter key="23" value="RH_out.true.real.attribute"/>
    <parameter key="24" value="Windspeed.true.real.attribute"/>
    <parameter key="25" value="Visibility.true.real.attribute"/>
    <parameter key="26" value="Tdewpoint.true.real.attribute"/>
    <parameter key="27" value="rv1.true.real.attribute"/>
    <parameter key="28" value="rv2.true.real.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="nominal_to_date" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="136">
    <parameter key="attribute_name" value="date"/>
    <parameter key="date_format" value="dd/MM/yyyy HH:mm"/>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Set Predictions_Params" width="90" x="313" y="136">
    <process expanded="true">
    <operator activated="true" class="set_macro" compatibility="8.1.003" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
    <parameter key="macro" value="WindowSize"/>
    <parameter key="value" value="20"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.1.003" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
    <parameter key="macro" value="horizon"/>
    <parameter key="value" value="5"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.1.003" expanded="true" height="82" name="Set Future_Day" width="90" x="313" y="34">
    <parameter key="macro" value="futureDay"/>
    <parameter key="value" value="96"/>
    </operator>
    <connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
    <connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
    <connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Day" to_port="through 1"/>
    <connect from_op="Set Future_Day" from_port="through 1" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="447" y="136">
    <parameter key="attribute_name" value="date"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="136">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Appliances"/>
    </operator>
    <operator activated="true" class="detect_outlier_lof" compatibility="8.1.003" expanded="true" height="82" name="Detect Outlier (LOF)" width="90" x="715" y="85"/>
    <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="849" y="85">
    <list key="filters_list">
    <parameter key="filters_entry_key" value="outlier.gt.1\.145"/>
    </list>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="1184" y="136">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="create_label" value="true"/>
    <parameter key="label_attribute" value="Appliances"/>
    <parameter key="horizon" value="%{horizon}"/>
    </operator>
    <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="1318" y="34">
    <parameter key="training_window_width" value="%{WindowSize}"/>
    <parameter key="test_window_width" value="10"/>
    <parameter key="horizon" value="%{horizon}"/>
    <process expanded="true">
    <operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="179" y="34">
    <enumeration key="hidden_layer_sizes">
    <parameter key="hidden_layer_sizes" value="50"/>
    <parameter key="hidden_layer_sizes" value="50"/>
    </enumeration>
    <enumeration key="hidden_dropout_ratios"/>
    <list key="expert_parameters"/>
    <list key="expert_parameters_"/>
    </operator>
    <connect from_port="training" to_op="Deep Learning" to_port="training set"/>
    <connect from_op="Deep Learning" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="8.1.003" expanded="true" height="82" name="Performance" width="90" x="246" y="34"/>
    <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="1318" y="187">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="label_attribute" value="inputYt"/>
    </operator>
    <operator activated="true" class="extract_macro" compatibility="8.1.003" expanded="true" height="68" name="Extract Example Count" width="90" x="1519" y="187">
    <parameter key="macro" value="exampleCount"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="1653" y="187">
    <parameter key="first_example" value="%{exampleCount}"/>
    <parameter key="last_example" value="%{exampleCount}"/>
    </operator>
    <operator activated="true" class="remember" compatibility="8.1.003" expanded="true" height="68" name="Remember" width="90" x="1787" y="187">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="loop" compatibility="8.1.003" expanded="true" height="82" name="Loop" width="90" x="1452" y="34">
    <parameter key="iterations" value="%{futureDay}"/>
    <process expanded="true">
    <operator activated="true" class="recall" compatibility="8.1.003" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.1.003" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
    <operator activated="true" class="materialize_data" compatibility="8.1.003" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="165">
    <list key="function_descriptions">
    <parameter key="date" value="date_add(date, 10, DATE_UNIT_MINUTE)"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="165">
    <parameter key="attribute_name" value="prediction(label)"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Appliances-19"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
    <parameter key="old_name" value="Appliances-18"/>
    <parameter key="new_name" value="Appliances-19"/>
    <list key="rename_additional_attributes">
    <parameter key="Appliances-17" value="Appliances-18"/>
    <parameter key="Appliances-16" value="Appliances-17"/>
    <parameter key="Appliances-15" value="Appliances-16"/>
    <parameter key="Appliances-14" value="Appliances-15"/>
    <parameter key="Appliances-13" value="Appliances-14"/>
    <parameter key="Appliances-12" value="Appliances-13"/>
    <parameter key="Appliances-11" value="Appliances-12"/>
    <parameter key="Appliances-10" value="Appliances-11"/>
    <parameter key="Appliances-9" value="Appliances-10"/>
    <parameter key="Appliances-8" value="Appliances-9"/>
    <parameter key="Appliances-7" value="Appliances-8"/>
    <parameter key="Appliances-6" value="Appliances-7"/>
    <parameter key="Appliances-5" value="Appliances-6"/>
    <parameter key="Appliances-4" value="Appliances-5"/>
    <parameter key="Appliances-3" value="Appliances-4"/>
    <parameter key="Appliances-2" value="Appliances-3"/>
    <parameter key="Appliances-1" value="Appliances-2"/>
    <parameter key="Appliances-0" value="Appliances-1"/>
    <parameter key="prediction(label)" value="Appliances-0"/>
    </list>
    </operator>
    <operator activated="true" class="remember" compatibility="8.1.003" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
    <parameter key="name" value="data"/>
    </operator>
    <connect from_port="input 1" to_op="Apply Model" to_port="model"/>
    <connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
    <connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
    <connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
    <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="1586" y="34"/>
    <connect from_op="Read CSV" from_port="output" to_op="Nominal to Date" to_port="example set input"/>
    <connect from_op="Nominal to Date" from_port="example set output" to_op="Set Predictions_Params" to_port="in 1"/>
    <connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Detect Outlier (LOF)" to_port="example set input"/>
    <connect from_op="Detect Outlier (LOF)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
    <connect from_op="Windowing for Training" from_port="example set output" to_op="Validation" to_port="training"/>
    <connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
    <connect from_op="Validation" from_port="model" to_op="Loop" to_port="input 1"/>
    <connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
    <connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
    <connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
    <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

     

    I hope it helps,

     

    Regards,

     

    Lionel

     

     

    sgenzer
  • pix123pix123 Member Posts: 27 Contributor I

    Hi @lionelderkrikor apologies for the delay. Thank you for this, it has been helpful.

     

    My machine currently has performance issues due to the memory needed to run a large process.

     

    Is there an efficient way to run a prediction model without the need to window?

     

    Thanks again.

Sign In or Register to comment.