RapidMiner

Predicting values based on historical data

Wisdom logo Registration now open for RapidMiner Wisdom Americas | New Orleans | October 10-12, 2018   Learn More
Contributor II maurits_freriks
Contributor II

Predicting values based on historical data

Hi,

 

I'm doing an assignment about prediciting flows based on historical data. So with data from last days I would like to predict the flow of tomorrow. Below you could find my XML code. 

 

 

I built process #1 to optimize the parameters of the validation operator and the SVM algorithm. The log file output this tabel. 

Screen Shot 2018-01-10 at 14.17.12 copy.png

I used this parameters for process #2 because in my opinion this is these parameters are the best ones to use, but I really obtained strange results and graphs. Below a picture of the predicted graph (blue) and the original flow of these days (red).

Screen Shot 2018-01-11 at 20.14.36.png

 

Probably I'm doing something wrong but I don't know what exactly. Ofcourse the flows does have some strange spikes and outliers, but the direction of the flow is completely nonsense. Is there someone who could help me out. I could share the datasets if someone do have some interest. 

 

If there are still questions, feel free to sent me a message. I'm really stucked at the moment and I have to hand in this assignment end of the month.

 

With kind regards,

 

Maurits Freriks 

 

p.s. I'm still a student so this is for education.

 

#1

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve test data only flow oktober days train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/test data only flow oktober days train set"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="A"/>
</operator>
<operator activated="true" class="anomalydetection:Histogram-based Outlier Score (HBOS)" compatibility="2.4.001" expanded="true" height="82" name="Histogram-based Outlier Score (HBOS)" width="90" x="313" y="34">
<list key="histogram properties">
<parameter key="A" value="fixed binwidth.-1"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
<list key="function_descriptions">
<parameter key="Maintainence" value="if(score&gt;0.7,1,0)"/>
</list>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.001" expanded="true" height="145" name="Optimize Parameters (Grid)" width="90" x="581" y="34">
<list key="parameters">
<parameter key="Validation.cumulative_training" value="true,false"/>
<parameter key="SVM.kernel_gamma" value="[0.1;0.8;5;logarithmic]"/>
<parameter key="SVM.C" value="[6000;10000;4;linear]"/>
<parameter key="Validation.training_window_width" value="[190;220;10;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="7.6.001" expanded="true" height="82" name="Set Macro" width="90" x="45" y="34">
<parameter key="macro" value="horizon"/>
<parameter key="value" value="2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Train" width="90" x="179" y="34">
<parameter key="window_size" value="%{horizon}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="220"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.8"/>
<parameter key="C" value="10000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="horizon" value="%{horizon}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.6.001" expanded="true" height="82" name="Log" width="90" x="581" y="85">
<parameter key="filename" value="tmp"/>
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="Gamma" value="operator.SVM.parameter.kernel_gamma"/>
<parameter key="Training Width" value="operator.Validation.parameter.training_window_width"/>
<parameter key="Step Width" value="operator.Validation.parameter.training_window_step_size"/>
<parameter key="Testing Width" value="operator.Validation.parameter.test_window_width"/>
<parameter key="Perf" value="operator.Validation.value.performance"/>
<parameter key="Set Macro Value" value="operator.Set Macro.value.macro_value"/>
</list>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="380" y="187">
<parameter key="window_size" value="%{horizon}"/>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Windowing Train" to_port="example set input"/>
<connect from_op="Windowing Train" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Windowing Train" from_port="original" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<connect from_op="Windowing Test" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve test data only flow oktober days train set" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Histogram-based Outlier Score (HBOS)" to_port="example set"/>
<connect from_op="Histogram-based Outlier Score (HBOS)" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

 

#2

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES test set" width="90" x="45" y="238">
<parameter key="repository_entry" value="../data/VRIES test set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="179" y="238">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/VRIES train set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="34">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="313" y="34">
<parameter key="training_window_width" value="214"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<parameter key="horizon" value="2"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.152"/>
<parameter key="C" value="7000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="horizon" value="2"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="313" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve VRIES test set" from_port="output" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Retrieve VRIES train set" from_port="output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>

8 REPLIES
Moderator Moderator
Moderator

Re: Predicting values based on historical data

Hi @maurits_freriks,

 

Can you share your dataset(s), please ?

 

Regards,

 

Lionel

RM Certified Expert RM Certified Expert
RM Certified Expert

Re: Predicting values based on historical data

I was helping @maurits_freriks offline for a bit but I'm crunched with work. My suggestion is to filter out the downward spikes because my convo's with him indicated that these were times we the system in a maintenance mode. 

Regards,
Thomas

Blog: Neural Market Trends

RapidMiner Tutorial Videos here!
Contributor II maurits_freriks
Contributor II

Re: Predicting values based on historical data

Hereby the datasets:

 

https://drive.google.com/open?id=12XjPKw2diSLnc9-MtAv_--SVfntA3nR-

 

Regards,

 

Maurits Freriks 

Contributor II maurits_freriks
Contributor II

Re: Predicting values based on historical data

An reaction to @Thomas_Ott. He helped be very well, I really appreciate his effort! But don't have te right results I would like to have. 

 

That's correct, those downward spikes have sometimes a relation with mainainance. But could also be something like a glitch. So sometimes you know before that the flow will be respectively lower but sometimes this is a surprise. 

Moderator Moderator
Moderator

Re: Predicting values based on historical data

Hi @maurits_freriks,

 

I obtain this with a Deep Learning model without optimization (Windows size = 20 / Horizon = 1) : 

predictions_maintenance.png

 

The spikes are difficult to predict in my opinion.

To test and maybe to improve the model, you can try to increase the windows size in the following process : 

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros>
      <macro>
        <key>futureMonths</key>
        <value>15</value>
      </macro>
      <macro>
        <key>horizon</key>
        <value>1</value>
      </macro>
      <macro>
        <key>windowSize</key>
        <value>6</value>
      </macro>
    </macros>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F274"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="time.true.date_time.attribute"/>
          <parameter key="1" value="data.true.real.attribute"/>
          <parameter key="2" value="C.true.attribute_value.attribute"/>
          <parameter key="3" value="D.true.attribute_value.attribute"/>
          <parameter key="4" value="E.true.attribute_value.attribute"/>
          <parameter key="5" value="F.true.attribute_value.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
        <process expanded="true">
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
            <parameter key="macro" value="WindowSize"/>
            <parameter key="value" value="20"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
            <parameter key="macro" value="horizon"/>
            <parameter key="value" value="1"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
            <parameter key="macro" value="futureDays"/>
            <parameter key="value" value="40"/>
          </operator>
          <connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
          <connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
          <connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
          <connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
        <parameter key="attribute_name" value="time"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="data"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
        <parameter key="window_size" value="%{WindowSize}"/>
        <parameter key="create_label" value="true"/>
        <parameter key="label_attribute" value="data"/>
        <parameter key="horizon" value="%{horizon}"/>
      </operator>
      <operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="916" y="34">
        <enumeration key="hidden_layer_sizes">
          <parameter key="hidden_layer_sizes" value="50"/>
          <parameter key="hidden_layer_sizes" value="50"/>
        </enumeration>
        <enumeration key="hidden_dropout_ratios"/>
        <list key="expert_parameters"/>
        <list key="expert_parameters_"/>
      </operator>
      <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
        <parameter key="window_size" value="%{WindowSize}"/>
        <parameter key="label_attribute" value="inputYt"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
        <parameter key="macro" value="exampleCount"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
        <parameter key="first_example" value="%{exampleCount}"/>
        <parameter key="last_example" value="%{exampleCount}"/>
      </operator>
      <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
        <parameter key="name" value="data"/>
      </operator>
      <operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
        <parameter key="iterations" value="%{futureDays}"/>
        <process expanded="true">
          <operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
            <parameter key="name" value="data"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
          <operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
          <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
            <list key="function_descriptions">
              <parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
            <parameter key="attribute_name" value="prediction(label)"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="data-19"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
            <parameter key="old_name" value="data-18"/>
            <parameter key="new_name" value="data-19"/>
            <list key="rename_additional_attributes">
              <parameter key="data-17" value="data-18"/>
              <parameter key="data-16" value="data-17"/>
              <parameter key="data-15" value="data-16"/>
              <parameter key="data-14" value="data-15"/>
              <parameter key="data-13" value="data-14"/>
              <parameter key="data-12" value="data-13"/>
              <parameter key="data-11" value="data-12"/>
              <parameter key="data-10" value="data-11"/>
              <parameter key="data-9" value="data-10"/>
              <parameter key="data-8" value="data-9"/>
              <parameter key="data-7" value="data-8"/>
              <parameter key="data-6" value="data-7"/>
              <parameter key="data-5" value="data-6"/>
              <parameter key="data-4" value="data-5"/>
              <parameter key="data-3" value="data-4"/>
              <parameter key="data-2" value="data-3"/>
              <parameter key="data-1" value="data-2"/>
              <parameter key="data-0" value="data-1"/>
              <parameter key="prediction(label)" value="data-0"/>
            </list>
          </operator>
          <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
            <parameter key="name" value="data"/>
          </operator>
          <connect from_port="input 1" to_op="Apply Model" to_port="model"/>
          <connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
          <connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
          <connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
        <parameter key="imported_cell_range" value="A1:B32"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="time.true.date_time.attribute"/>
          <parameter key="1" value="data.true.numeric.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="time" value="time"/>
        </list>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
      <connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
      <connect from_op="Windowing for Training" from_port="example set output" to_op="Deep Learning" to_port="training set"/>
      <connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
      <connect from_op="Deep Learning" from_port="model" to_op="Loop" to_port="input 1"/>
      <connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
      <connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
      <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
      <connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

I hope it will help you in your project.

 

Regards,

 

Lionel

 

 

 

 

Contributor II maurits_freriks
Contributor II

Re: Predicting values based on historical data

Hi @lionelderkrikor

 

Thanks for helping me out! 

Correct me if I'm wrong: The graph you showed is not really accurate right? Or do you think this is the best possible way to predict? Shoud a NN give a better approach, If I'm trying to run this with an optimization operator it takes me days because my device is to slow, how about yours?

 

Regards,

 

Maurits Freriks 

Moderator Moderator
Moderator

Re: Predicting values based on historical data

Hi @maurits_freriks,

 

You're right, the graph I showed is not of course the best possible way to predict, only a track, however

"who on Earth can boast of finding the best possible way to predict ......?"....

More seriously, I have the same problem as you : Optimization process is too long. I perform optimization only with an only one parameter every time.

With Neural Networks, i haven't good results : 

 - the predictive curve is constant (horizontal line) or

 - the predictive curve increase in the second part of the test point (so the predictive curve get away from the test curve).

For the moment, the best fit I found, it's always with Deep Learning (n_epochs = 3.3) and Windows size = 82 / Horizon = 1.

Here the curve(s) : 

predictions_maintenance_2.png

and here the associated process : 

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros>
      <macro>
        <key>futureMonths</key>
        <value>15</value>
      </macro>
      <macro>
        <key>horizon</key>
        <value>1</value>
      </macro>
      <macro>
        <key>windowSize</key>
        <value>6</value>
      </macro>
    </macros>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F274"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="time.true.date_time.attribute"/>
          <parameter key="1" value="data.true.real.attribute"/>
          <parameter key="2" value="C.true.attribute_value.attribute"/>
          <parameter key="3" value="D.true.attribute_value.attribute"/>
          <parameter key="4" value="E.true.attribute_value.attribute"/>
          <parameter key="5" value="F.true.attribute_value.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
        <process expanded="true">
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
            <parameter key="macro" value="WindowSize"/>
            <parameter key="value" value="82"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
            <parameter key="macro" value="horizon"/>
            <parameter key="value" value="1"/>
          </operator>
          <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
            <parameter key="macro" value="futureDays"/>
            <parameter key="value" value="40"/>
          </operator>
          <connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
          <connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
          <connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
          <connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
        <parameter key="attribute_name" value="time"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="data"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
        <parameter key="window_size" value="%{WindowSize}"/>
        <parameter key="create_label" value="true"/>
        <parameter key="label_attribute" value="data"/>
        <parameter key="horizon" value="%{horizon}"/>
      </operator>
      <operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="8.0.001" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="983" y="34">
        <list key="parameters">
          <parameter key="Deep Learning.epochs" value="[0.1;10.1;100;linear]"/>
          <parameter key="Deep Learning.adaptive_rate" value="true,false"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
            <parameter key="training_window_width" value="75"/>
            <parameter key="test_window_width" value="50"/>
            <process expanded="true">
              <operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="179" y="34">
                <enumeration key="hidden_layer_sizes">
                  <parameter key="hidden_layer_sizes" value="50"/>
                  <parameter key="hidden_layer_sizes" value="50"/>
                </enumeration>
                <enumeration key="hidden_dropout_ratios"/>
                <list key="expert_parameters"/>
                <list key="expert_parameters_"/>
              </operator>
              <connect from_port="training" to_op="Deep Learning" to_port="training set"/>
              <connect from_op="Deep Learning" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_regression" compatibility="8.0.001" expanded="true" height="82" name="Performance" width="90" x="246" y="34"/>
              <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Validation" to_port="training"/>
          <connect from_op="Validation" from_port="model" to_port="model"/>
          <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
        <parameter key="window_size" value="%{WindowSize}"/>
        <parameter key="label_attribute" value="inputYt"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
        <parameter key="macro" value="exampleCount"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
        <parameter key="first_example" value="%{exampleCount}"/>
        <parameter key="last_example" value="%{exampleCount}"/>
      </operator>
      <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
        <parameter key="name" value="data"/>
      </operator>
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
        <parameter key="imported_cell_range" value="A1:B32"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="time.true.date_time.attribute"/>
          <parameter key="1" value="data.true.numeric.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
        <parameter key="iterations" value="%{futureDays}"/>
        <process expanded="true">
          <operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
            <parameter key="name" value="data"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
          <operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
          <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
            <list key="function_descriptions">
              <parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
            <parameter key="attribute_name" value="prediction(label)"/>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="data-81"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
            <parameter key="old_name" value="data-80"/>
            <parameter key="new_name" value="data-81"/>
            <list key="rename_additional_attributes">
              <parameter key="data-79" value="data-80"/>
              <parameter key="data-78" value="data-79"/>
              <parameter key="data-77" value="data-78"/>
              <parameter key="data-76" value="data-77"/>
              <parameter key="data-75" value="data-76"/>
              <parameter key="data-74" value="data-75"/>
              <parameter key="data-73" value="data-74"/>
              <parameter key="data-72" value="data-73"/>
              <parameter key="data-71" value="data-72"/>
              <parameter key="data-70" value="data-71"/>
              <parameter key="data-69" value="data-70"/>
              <parameter key="data-68" value="data-69"/>
              <parameter key="data-67" value="data-68"/>
              <parameter key="data-66" value="data-67"/>
              <parameter key="data-65" value="data-66"/>
              <parameter key="data-64" value="data-65"/>
              <parameter key="data-63" value="data-64"/>
              <parameter key="data-62" value="data-63"/>
              <parameter key="data-61" value="data-62"/>
              <parameter key="data-60" value="data-61"/>
              <parameter key="data-59" value="data-60"/>
              <parameter key="data-58" value="data-59"/>
              <parameter key="data-57" value="data-58"/>
              <parameter key="data-56" value="data-57"/>
              <parameter key="data-55" value="data-56"/>
              <parameter key="data-54" value="data-55"/>
              <parameter key="data-53" value="data-54"/>
              <parameter key="data-52" value="data-53"/>
              <parameter key="data-51" value="data-52"/>
              <parameter key="data-50" value="data-51"/>
              <parameter key="data-49" value="data-50"/>
              <parameter key="data-48" value="data-49"/>
              <parameter key="data-47" value="data-48"/>
              <parameter key="data-46" value="data-47"/>
              <parameter key="data-45" value="data-46"/>
              <parameter key="data-44" value="data-45"/>
              <parameter key="data-43" value="data-44"/>
              <parameter key="data-42" value="data-43"/>
              <parameter key="data-41" value="data-42"/>
              <parameter key="data-40" value="data-41"/>
              <parameter key="data-39" value="data-40"/>
              <parameter key="data-38" value="data-39"/>
              <parameter key="data-37" value="data-38"/>
              <parameter key="data-36" value="data-37"/>
              <parameter key="data-35" value="data-36"/>
              <parameter key="data-34" value="data-35"/>
              <parameter key="data-33" value="data-34"/>
              <parameter key="data-32" value="data-33"/>
              <parameter key="data-31" value="data-32"/>
              <parameter key="data-30" value="data-31"/>
              <parameter key="data-29" value="data-30"/>
              <parameter key="data-28" value="data-29"/>
              <parameter key="data-27" value="data-28"/>
              <parameter key="data-26" value="data-27"/>
              <parameter key="data-25" value="data-26"/>
              <parameter key="data-24" value="data-25"/>
              <parameter key="data-23" value="data-24"/>
              <parameter key="data-22" value="data-23"/>
              <parameter key="data-21" value="data-22"/>
              <parameter key="data-20" value="data-21"/>
              <parameter key="data-19" value="data-20"/>
              <parameter key="data-18" value="data-19"/>
              <parameter key="data-17" value="data-18"/>
              <parameter key="data-16" value="data-17"/>
              <parameter key="data-15" value="data-16"/>
              <parameter key="data-14" value="data-15"/>
              <parameter key="data-13" value="data-14"/>
              <parameter key="data-12" value="data-13"/>
              <parameter key="data-11" value="data-12"/>
              <parameter key="data-10" value="data-11"/>
              <parameter key="data-9" value="data-10"/>
              <parameter key="data-8" value="data-9"/>
              <parameter key="data-7" value="data-8"/>
              <parameter key="data-6" value="data-7"/>
              <parameter key="data-5" value="data-6"/>
              <parameter key="data-4" value="data-5"/>
              <parameter key="data-3" value="data-4"/>
              <parameter key="data-2" value="data-3"/>
              <parameter key="data-1" value="data-2"/>
              <parameter key="data-0" value="data-1"/>
              <parameter key="prediction(label)" value="data-0"/>
            </list>
          </operator>
          <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
            <parameter key="name" value="data"/>
          </operator>
          <connect from_port="input 1" to_op="Apply Model" to_port="model"/>
          <connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
          <connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
          <connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
      <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="time" value="time"/>
        </list>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
      <connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
      <connect from_op="Windowing for Training" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
      <connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 3"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="model" to_op="Loop" to_port="input 1"/>
      <connect from_op="Optimize Parameters (Grid)" from_port="parameter set" to_port="result 2"/>
      <connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
      <connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
      <connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
      <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
      <connect from_op="Join" from_port="join" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

I hope it will be helpful,

 

Regards,

 

Lionel

Highlighted
RM Certified Expert RM Certified Expert
RM Certified Expert

Re: Predicting values based on historical data

Try filtering out the spikes downward and run the model again. I think they're really messing with the analysis. 

Regards,
Thomas

Blog: Neural Market Trends

RapidMiner Tutorial Videos here!