Predicting values based on historical data

maurits_freriksmaurits_freriks Member Posts: 28 Contributor I
edited November 2018 in Help

Hi,

 

I'm doing an assignment about prediciting flows based on historical data. So with data from last days I would like to predict the flow of tomorrow. Below you could find my XML code. 

 

 

I built process #1 to optimize the parameters of the validation operator and the SVM algorithm. The log file output this tabel. 

Screen Shot 2018-01-10 at 14.17.12 copy.png

I used this parameters for process #2 because in my opinion this is these parameters are the best ones to use, but I really obtained strange results and graphs. Below a picture of the predicted graph (blue) and the original flow of these days (red).

Screen Shot 2018-01-11 at 20.14.36.png

 

Probably I'm doing something wrong but I don't know what exactly. Ofcourse the flows does have some strange spikes and outliers, but the direction of the flow is completely nonsense. Is there someone who could help me out. I could share the datasets if someone do have some interest. 

 

If there are still questions, feel free to sent me a message. I'm really stucked at the moment and I have to hand in this assignment end of the month.

 

With kind regards,

 

Maurits Freriks 

 

p.s. I'm still a student so this is for education.

 

#1

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve test data only flow oktober days train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/test data only flow oktober days train set"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.6.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="A"/>
</operator>
<operator activated="true" class="anomalydetection:Histogram-based Outlier Score (HBOS)" compatibility="2.4.001" expanded="true" height="82" name="Histogram-based Outlier Score (HBOS)" width="90" x="313" y="34">
<list key="histogram properties">
<parameter key="A" value="fixed binwidth.-1"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.6.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
<list key="function_descriptions">
<parameter key="Maintainence" value="if(score&gt;0.7,1,0)"/>
</list>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.6.001" expanded="true" height="145" name="Optimize Parameters (Grid)" width="90" x="581" y="34">
<list key="parameters">
<parameter key="Validation.cumulative_training" value="true,false"/>
<parameter key="SVM.kernel_gamma" value="[0.1;0.8;5;logarithmic]"/>
<parameter key="SVM.C" value="[6000;10000;4;linear]"/>
<parameter key="Validation.training_window_width" value="[190;220;10;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="set_macro" compatibility="7.6.001" expanded="true" height="82" name="Set Macro" width="90" x="45" y="34">
<parameter key="macro" value="horizon"/>
<parameter key="value" value="2"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Train" width="90" x="179" y="34">
<parameter key="window_size" value="%{horizon}"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
<parameter key="training_window_width" value="220"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.8"/>
<parameter key="C" value="10000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="horizon" value="%{horizon}"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.6.001" expanded="true" height="82" name="Log" width="90" x="581" y="85">
<parameter key="filename" value="tmp"/>
<list key="log">
<parameter key="C" value="operator.SVM.parameter.C"/>
<parameter key="Gamma" value="operator.SVM.parameter.kernel_gamma"/>
<parameter key="Training Width" value="operator.Validation.parameter.training_window_width"/>
<parameter key="Step Width" value="operator.Validation.parameter.training_window_step_size"/>
<parameter key="Testing Width" value="operator.Validation.parameter.test_window_width"/>
<parameter key="Perf" value="operator.Validation.value.performance"/>
<parameter key="Set Macro Value" value="operator.Set Macro.value.macro_value"/>
</list>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing Test" width="90" x="380" y="187">
<parameter key="window_size" value="%{horizon}"/>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Windowing Train" to_port="example set input"/>
<connect from_op="Windowing Train" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Windowing Train" from_port="original" to_op="Windowing Test" to_port="example set input"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<connect from_op="Windowing Test" from_port="example set output" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve test data only flow oktober days train set" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Histogram-based Outlier Score (HBOS)" to_port="example set"/>
<connect from_op="Histogram-based Outlier Score (HBOS)" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 2"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

 

#2

<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES test set" width="90" x="45" y="238">
<parameter key="repository_entry" value="../data/VRIES test set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="179" y="238">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve VRIES train set" width="90" x="45" y="34">
<parameter key="repository_entry" value="../data/VRIES train set"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="34">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="A"/>
<parameter key="horizon" value="2"/>
</operator>
<operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="313" y="34">
<parameter key="training_window_width" value="214"/>
<parameter key="training_window_step_size" value="5"/>
<parameter key="test_window_width" value="4"/>
<parameter key="horizon" value="2"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="112" y="34">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="0.152"/>
<parameter key="C" value="7000.0"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="horizon" value="2"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="313" y="238">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve VRIES test set" from_port="output" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Retrieve VRIES train set" from_port="output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>

Tagged:

Answers

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @maurits_freriks,

     

    Can you share your dataset(s), please ?

     

    Regards,

     

    Lionel

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    I was helping @maurits_freriks offline for a bit but I'm crunched with work. My suggestion is to filter out the downward spikes because my convo's with him indicated that these were times we the system in a maintenance mode. 

  • maurits_freriksmaurits_freriks Member Posts: 28 Contributor I

    Hereby the datasets:

     

    https://drive.google.com/open?id=12XjPKw2diSLnc9-MtAv_--SVfntA3nR-

     

    Regards,

     

    Maurits Freriks 

  • maurits_freriksmaurits_freriks Member Posts: 28 Contributor I

    An reaction to @Thomas_Ott. He helped be very well, I really appreciate his effort! But don't have te right results I would like to have. 

     

    That's correct, those downward spikes have sometimes a relation with mainainance. But could also be something like a glitch. So sometimes you know before that the flow will be respectively lower but sometimes this is a surprise. 

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @maurits_freriks,

     

    I obtain this with a Deep Learning model without optimization (Windows size = 20 / Horizon = 1) : 

    predictions_maintenance.png

     

    The spikes are difficult to predict in my opinion.

    To test and maybe to improve the model, you can try to increase the windows size in the following process : 

    <?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
    <context>
    <input/>
    <output/>
    <macros>
    <macro>
    <key>futureMonths</key>
    <value>15</value>
    </macro>
    <macro>
    <key>horizon</key>
    <value>1</value>
    </macro>
    <macro>
    <key>windowSize</key>
    <value>6</value>
    </macro>
    </macros>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
    <parameter key="imported_cell_range" value="A1:F274"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="time.true.date_time.attribute"/>
    <parameter key="1" value="data.true.real.attribute"/>
    <parameter key="2" value="C.true.attribute_value.attribute"/>
    <parameter key="3" value="D.true.attribute_value.attribute"/>
    <parameter key="4" value="E.true.attribute_value.attribute"/>
    <parameter key="5" value="F.true.attribute_value.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
    <process expanded="true">
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
    <parameter key="macro" value="WindowSize"/>
    <parameter key="value" value="20"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
    <parameter key="macro" value="horizon"/>
    <parameter key="value" value="1"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
    <parameter key="macro" value="futureDays"/>
    <parameter key="value" value="40"/>
    </operator>
    <connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
    <connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
    <connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
    <connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
    <parameter key="attribute_name" value="time"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="data"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
    <parameter key="condition_class" value="no_missing_attributes"/>
    <list key="filters_list"/>
    </operator>
    <operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="create_label" value="true"/>
    <parameter key="label_attribute" value="data"/>
    <parameter key="horizon" value="%{horizon}"/>
    </operator>
    <operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="916" y="34">
    <enumeration key="hidden_layer_sizes">
    <parameter key="hidden_layer_sizes" value="50"/>
    <parameter key="hidden_layer_sizes" value="50"/>
    </enumeration>
    <enumeration key="hidden_dropout_ratios"/>
    <list key="expert_parameters"/>
    <list key="expert_parameters_"/>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="label_attribute" value="inputYt"/>
    </operator>
    <operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
    <parameter key="macro" value="exampleCount"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
    <parameter key="first_example" value="%{exampleCount}"/>
    <parameter key="last_example" value="%{exampleCount}"/>
    </operator>
    <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
    <parameter key="iterations" value="%{futureDays}"/>
    <process expanded="true">
    <operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
    <operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
    <list key="function_descriptions">
    <parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
    <parameter key="attribute_name" value="prediction(label)"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="data-19"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
    <parameter key="old_name" value="data-18"/>
    <parameter key="new_name" value="data-19"/>
    <list key="rename_additional_attributes">
    <parameter key="data-17" value="data-18"/>
    <parameter key="data-16" value="data-17"/>
    <parameter key="data-15" value="data-16"/>
    <parameter key="data-14" value="data-15"/>
    <parameter key="data-13" value="data-14"/>
    <parameter key="data-12" value="data-13"/>
    <parameter key="data-11" value="data-12"/>
    <parameter key="data-10" value="data-11"/>
    <parameter key="data-9" value="data-10"/>
    <parameter key="data-8" value="data-9"/>
    <parameter key="data-7" value="data-8"/>
    <parameter key="data-6" value="data-7"/>
    <parameter key="data-5" value="data-6"/>
    <parameter key="data-4" value="data-5"/>
    <parameter key="data-3" value="data-4"/>
    <parameter key="data-2" value="data-3"/>
    <parameter key="data-1" value="data-2"/>
    <parameter key="data-0" value="data-1"/>
    <parameter key="prediction(label)" value="data-0"/>
    </list>
    </operator>
    <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
    <parameter key="name" value="data"/>
    </operator>
    <connect from_port="input 1" to_op="Apply Model" to_port="model"/>
    <connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
    <connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
    <connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
    <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
    <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B32"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="time.true.date_time.attribute"/>
    <parameter key="1" value="data.true.numeric.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="time" value="time"/>
    </list>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
    <connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
    <connect from_op="Windowing for Training" from_port="example set output" to_op="Deep Learning" to_port="training set"/>
    <connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
    <connect from_op="Deep Learning" from_port="model" to_op="Loop" to_port="input 1"/>
    <connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
    <connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
    <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
    <connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
    <connect from_op="Join" from_port="join" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it will help you in your project.

     

    Regards,

     

    Lionel

     

     

     

     

  • maurits_freriksmaurits_freriks Member Posts: 28 Contributor I

    Hi @lionelderkrikor

     

    Thanks for helping me out! 

    Correct me if I'm wrong: The graph you showed is not really accurate right? Or do you think this is the best possible way to predict? Shoud a NN give a better approach, If I'm trying to run this with an optimization operator it takes me days because my device is to slow, how about yours?

     

    Regards,

     

    Maurits Freriks 

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @maurits_freriks,

     

    You're right, the graph I showed is not of course the best possible way to predict, only a track, however

    "who on Earth can boast of finding the best possible way to predict ......?"....

    More seriously, I have the same problem as you : Optimization process is too long. I perform optimization only with an only one parameter every time.

    With Neural Networks, i haven't good results : 

     - the predictive curve is constant (horizontal line) or

     - the predictive curve increase in the second part of the test point (so the predictive curve get away from the test curve).

    For the moment, the best fit I found, it's always with Deep Learning (n_epochs = 3.3) and Windows size = 82 / Horizon = 1.

    Here the curve(s) : 

    predictions_maintenance_2.png

    and here the associated process : 

    <?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
    <context>
    <input/>
    <output/>
    <macros>
    <macro>
    <key>futureMonths</key>
    <value>15</value>
    </macro>
    <macro>
    <key>horizon</key>
    <value>1</value>
    </macro>
    <macro>
    <key>windowSize</key>
    <value>6</value>
    </macro>
    </macros>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="85">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Train.xlsx"/>
    <parameter key="imported_cell_range" value="A1:F274"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="time.true.date_time.attribute"/>
    <parameter key="1" value="data.true.real.attribute"/>
    <parameter key="2" value="C.true.attribute_value.attribute"/>
    <parameter key="3" value="D.true.attribute_value.attribute"/>
    <parameter key="4" value="E.true.attribute_value.attribute"/>
    <parameter key="5" value="F.true.attribute_value.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.0.001" expanded="true" height="82" name="Set Predictions_Params" width="90" x="179" y="85">
    <process expanded="true">
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Window_Size" width="90" x="45" y="34">
    <parameter key="macro" value="WindowSize"/>
    <parameter key="value" value="82"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Horizon" width="90" x="179" y="34">
    <parameter key="macro" value="horizon"/>
    <parameter key="value" value="1"/>
    </operator>
    <operator activated="true" class="set_macro" compatibility="8.0.001" expanded="true" height="82" name="Set Future_Days" width="90" x="313" y="34">
    <parameter key="macro" value="futureDays"/>
    <parameter key="value" value="40"/>
    </operator>
    <connect from_port="in 1" to_op="Set Window_Size" to_port="through 1"/>
    <connect from_op="Set Window_Size" from_port="through 1" to_op="Set Horizon" to_port="through 1"/>
    <connect from_op="Set Horizon" from_port="through 1" to_op="Set Future_Days" to_port="through 1"/>
    <connect from_op="Set Future_Days" from_port="through 1" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
    <parameter key="attribute_name" value="time"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="85">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="data"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="85">
    <parameter key="condition_class" value="no_missing_attributes"/>
    <list key="filters_list"/>
    </operator>
    <operator activated="true" breakpoints="after" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Training" width="90" x="782" y="85">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="create_label" value="true"/>
    <parameter key="label_attribute" value="data"/>
    <parameter key="horizon" value="%{horizon}"/>
    </operator>
    <operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="8.0.001" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="983" y="34">
    <list key="parameters">
    <parameter key="Deep Learning.epochs" value="[0.1;10.1;100;linear]"/>
    <parameter key="Deep Learning.adaptive_rate" value="true,false"/>
    </list>
    <process expanded="true">
    <operator activated="true" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="380" y="34">
    <parameter key="training_window_width" value="75"/>
    <parameter key="test_window_width" value="50"/>
    <process expanded="true">
    <operator activated="true" class="h2o:deep_learning" compatibility="7.6.001" expanded="true" height="82" name="Deep Learning" width="90" x="179" y="34">
    <enumeration key="hidden_layer_sizes">
    <parameter key="hidden_layer_sizes" value="50"/>
    <parameter key="hidden_layer_sizes" value="50"/>
    </enumeration>
    <enumeration key="hidden_dropout_ratios"/>
    <list key="expert_parameters"/>
    <list key="expert_parameters_"/>
    </operator>
    <connect from_port="training" to_op="Deep Learning" to_port="training set"/>
    <connect from_op="Deep Learning" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.0.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="8.0.001" expanded="true" height="82" name="Performance" width="90" x="246" y="34"/>
    <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    </process>
    </operator>
    <connect from_port="input 1" to_op="Validation" to_port="training"/>
    <connect from_op="Validation" from_port="model" to_port="model"/>
    <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_performance" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="series:windowing" compatibility="5.2.000" expanded="true" height="82" name="Windowing for Application" width="90" x="916" y="136">
    <parameter key="window_size" value="%{WindowSize}"/>
    <parameter key="label_attribute" value="inputYt"/>
    </operator>
    <operator activated="true" class="extract_macro" compatibility="8.0.001" expanded="true" height="68" name="Extract Example Count" width="90" x="1117" y="136">
    <parameter key="macro" value="exampleCount"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.0.001" expanded="true" height="82" name="Filter Example Range" width="90" x="1251" y="136">
    <parameter key="first_example" value="%{exampleCount}"/>
    <parameter key="last_example" value="%{exampleCount}"/>
    </operator>
    <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember" width="90" x="1385" y="136">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="916" y="238">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\TimeData_Maintenance\Rapidminer\Test.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B32"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="time.true.date_time.attribute"/>
    <parameter key="1" value="data.true.numeric.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="loop" compatibility="8.0.001" expanded="true" height="82" name="Loop" width="90" x="1117" y="34">
    <parameter key="iterations" value="%{futureDays}"/>
    <process expanded="true">
    <operator activated="true" class="recall" compatibility="8.0.001" expanded="true" height="68" name="Recall" width="90" x="45" y="136">
    <parameter key="name" value="data"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Model" width="90" x="179" y="30">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="30"/>
    <operator activated="true" class="materialize_data" compatibility="8.0.001" expanded="true" height="82" name="Materialize Data (2)" width="90" x="179" y="165"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Increase Date (2)" width="90" x="313" y="187">
    <list key="function_descriptions">
    <parameter key="time" value="date_add(time, 1, DATE_UNIT_DAY)"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="187">
    <parameter key="attribute_name" value="prediction(label)"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="data-81"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="313" y="289">
    <parameter key="old_name" value="data-80"/>
    <parameter key="new_name" value="data-81"/>
    <list key="rename_additional_attributes">
    <parameter key="data-79" value="data-80"/>
    <parameter key="data-78" value="data-79"/>
    <parameter key="data-77" value="data-78"/>
    <parameter key="data-76" value="data-77"/>
    <parameter key="data-75" value="data-76"/>
    <parameter key="data-74" value="data-75"/>
    <parameter key="data-73" value="data-74"/>
    <parameter key="data-72" value="data-73"/>
    <parameter key="data-71" value="data-72"/>
    <parameter key="data-70" value="data-71"/>
    <parameter key="data-69" value="data-70"/>
    <parameter key="data-68" value="data-69"/>
    <parameter key="data-67" value="data-68"/>
    <parameter key="data-66" value="data-67"/>
    <parameter key="data-65" value="data-66"/>
    <parameter key="data-64" value="data-65"/>
    <parameter key="data-63" value="data-64"/>
    <parameter key="data-62" value="data-63"/>
    <parameter key="data-61" value="data-62"/>
    <parameter key="data-60" value="data-61"/>
    <parameter key="data-59" value="data-60"/>
    <parameter key="data-58" value="data-59"/>
    <parameter key="data-57" value="data-58"/>
    <parameter key="data-56" value="data-57"/>
    <parameter key="data-55" value="data-56"/>
    <parameter key="data-54" value="data-55"/>
    <parameter key="data-53" value="data-54"/>
    <parameter key="data-52" value="data-53"/>
    <parameter key="data-51" value="data-52"/>
    <parameter key="data-50" value="data-51"/>
    <parameter key="data-49" value="data-50"/>
    <parameter key="data-48" value="data-49"/>
    <parameter key="data-47" value="data-48"/>
    <parameter key="data-46" value="data-47"/>
    <parameter key="data-45" value="data-46"/>
    <parameter key="data-44" value="data-45"/>
    <parameter key="data-43" value="data-44"/>
    <parameter key="data-42" value="data-43"/>
    <parameter key="data-41" value="data-42"/>
    <parameter key="data-40" value="data-41"/>
    <parameter key="data-39" value="data-40"/>
    <parameter key="data-38" value="data-39"/>
    <parameter key="data-37" value="data-38"/>
    <parameter key="data-36" value="data-37"/>
    <parameter key="data-35" value="data-36"/>
    <parameter key="data-34" value="data-35"/>
    <parameter key="data-33" value="data-34"/>
    <parameter key="data-32" value="data-33"/>
    <parameter key="data-31" value="data-32"/>
    <parameter key="data-30" value="data-31"/>
    <parameter key="data-29" value="data-30"/>
    <parameter key="data-28" value="data-29"/>
    <parameter key="data-27" value="data-28"/>
    <parameter key="data-26" value="data-27"/>
    <parameter key="data-25" value="data-26"/>
    <parameter key="data-24" value="data-25"/>
    <parameter key="data-23" value="data-24"/>
    <parameter key="data-22" value="data-23"/>
    <parameter key="data-21" value="data-22"/>
    <parameter key="data-20" value="data-21"/>
    <parameter key="data-19" value="data-20"/>
    <parameter key="data-18" value="data-19"/>
    <parameter key="data-17" value="data-18"/>
    <parameter key="data-16" value="data-17"/>
    <parameter key="data-15" value="data-16"/>
    <parameter key="data-14" value="data-15"/>
    <parameter key="data-13" value="data-14"/>
    <parameter key="data-12" value="data-13"/>
    <parameter key="data-11" value="data-12"/>
    <parameter key="data-10" value="data-11"/>
    <parameter key="data-9" value="data-10"/>
    <parameter key="data-8" value="data-9"/>
    <parameter key="data-7" value="data-8"/>
    <parameter key="data-6" value="data-7"/>
    <parameter key="data-5" value="data-6"/>
    <parameter key="data-4" value="data-5"/>
    <parameter key="data-3" value="data-4"/>
    <parameter key="data-2" value="data-3"/>
    <parameter key="data-1" value="data-2"/>
    <parameter key="data-0" value="data-1"/>
    <parameter key="prediction(label)" value="data-0"/>
    </list>
    </operator>
    <operator activated="true" class="remember" compatibility="8.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="447" y="289">
    <parameter key="name" value="data"/>
    </operator>
    <connect from_port="input 1" to_op="Apply Model" to_port="model"/>
    <connect from_op="Recall" from_port="result" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="output 1"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Materialize Data (2)" to_port="example set input"/>
    <connect from_op="Materialize Data (2)" from_port="example set output" to_op="Increase Date (2)" to_port="example set input"/>
    <connect from_op="Increase Date (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
    <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Remember (2)" to_port="store"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.0.001" expanded="true" height="82" name="Append" width="90" x="1251" y="34"/>
    <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="1385" y="34">
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="time" value="time"/>
    </list>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Set Predictions_Params" to_port="in 1"/>
    <connect from_op="Set Predictions_Params" from_port="out 1" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Windowing for Training" to_port="example set input"/>
    <connect from_op="Windowing for Training" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
    <connect from_op="Windowing for Training" from_port="original" to_op="Windowing for Application" to_port="example set input"/>
    <connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 3"/>
    <connect from_op="Optimize Parameters (Grid)" from_port="model" to_op="Loop" to_port="input 1"/>
    <connect from_op="Optimize Parameters (Grid)" from_port="parameter set" to_port="result 2"/>
    <connect from_op="Windowing for Application" from_port="example set output" to_op="Extract Example Count" to_port="example set"/>
    <connect from_op="Extract Example Count" from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Remember" to_port="store"/>
    <connect from_op="Read Excel (2)" from_port="output" to_op="Join" to_port="right"/>
    <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_op="Join" to_port="left"/>
    <connect from_op="Join" from_port="join" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it will be helpful,

     

    Regards,

     

    Lionel

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    Try filtering out the spikes downward and run the model again. I think they're really messing with the analysis. 

Sign In or Register to comment.