Options

Unable to optimize parameters in Fit Trend-SVM model

mmaragmmarag Member Posts: 35 Maven
edited November 2018 in Help

Hello,

i am trying to oprimize the SVM parameter C using GRID optimization in an attempth to fit the trend of a stock market as better as possible. I get the error message: Trend(label) attribute is duplicate. What am i doing wrong?

 

here is the process:

<?xml version="1.0" encoding="UTF-8"?><process version="7.3.000-BETA">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.000-BETA" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="7.3.000-BETA" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
<parameter key="excel_file" value="C:\Users\Manolis\Desktop\Book1.xlsx"/>
<parameter key="imported_cell_range" value="A1:I1670"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="Trade Date.true.polynominal.attribute"/>
<parameter key="1" value="High.true.real.attribute"/>
<parameter key="2" value="Low.true.real.attribute"/>
<parameter key="3" value="Open.true.real.attribute"/>
<parameter key="4" value="Close.true.numeric.attribute"/>
<parameter key="5" value="Volume.true.integer.attribute"/>
<parameter key="6" value="Prev\. Close.true.numeric.attribute"/>
<parameter key="7" value="Total Turnover.true.numeric.attribute"/>
<parameter key="8" value="Num\. Of Trans\..true.integer.attribute"/>
</list>
</operator>
<operator activated="true" class="generate_id" compatibility="7.3.000-BETA" expanded="true" height="82" name="Generate ID" width="90" x="112" y="187"/>
<operator activated="true" class="sort" compatibility="7.3.000-BETA" expanded="true" height="82" name="Sort" width="90" x="313" y="187">
<parameter key="attribute_name" value="id"/>
<parameter key="sorting_direction" value="decreasing"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.3.000-BETA" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="442">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Volume|Total Turnover|Open|Num. Of Trans.|Low|High|Close"/>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.2.000" expanded="true" height="82" name="Windowing" width="90" x="447" y="544">
<parameter key="window_size" value="5"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="Close"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="7.3.000-BETA" expanded="true" height="103" name="Optimize Parameters (Grid)" width="90" x="648" y="544">
<list key="parameters">
<parameter key="SVM.C" value="[0;1;100;linear]"/>
</list>
<process expanded="true">
<operator activated="true" class="series:fit_trend" compatibility="7.2.000" expanded="true" height="68" name="Fit Trend" width="90" x="313" y="289">
<parameter key="attribute" value="label"/>
<process expanded="true">
<operator activated="false" class="h2o:generalized_linear_model" compatibility="7.3.000-BETA" expanded="true" height="103" name="Generalized Linear Model (2)" width="90" x="179" y="238">
<list key="beta_constraints"/>
<list key="expert_parameters"/>
</operator>
<operator activated="false" class="neural_net" compatibility="7.3.000-BETA" expanded="true" height="82" name="Neural Net" width="90" x="380" y="289">
<list key="hidden_layers"/>
</operator>
<operator activated="true" class="support_vector_machine" compatibility="7.3.000-BETA" expanded="true" height="124" name="SVM" width="90" x="514" y="391">
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="5.0"/>
<parameter key="kernel_cache" value="200000"/>
<parameter key="C" value="0.01"/>
<parameter key="convergence_epsilon" value="0.01"/>
<parameter key="max_iterations" value="10000000"/>
<parameter key="L_pos" value="3.0"/>
<parameter key="L_neg" value="3.0"/>
<parameter key="epsilon" value="0.01"/>
<parameter key="epsilon_plus" value="0.01"/>
</operator>
<operator activated="false" class="h2o:deep_learning" compatibility="7.3.000-BETA" expanded="true" height="82" name="Deep Learning (2)" width="90" x="313" y="136">
<parameter key="activation" value="Tanh"/>
<enumeration key="hidden_layer_sizes">
<parameter key="hidden_layer_sizes" value="3"/>
<parameter key="hidden_layer_sizes" value="2"/>
<parameter key="hidden_layer_sizes" value="3"/>
<parameter key="hidden_layer_sizes" value="2"/>
</enumeration>
<enumeration key="hidden_dropout_ratios"/>
<list key="expert_parameters"/>
<list key="expert_parameters_"/>
</operator>
<connect from_port="example set" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="7.3.000-BETA" expanded="true" height="82" name="Set Role" width="90" x="514" y="187">
<parameter key="attribute_name" value="trend(label)"/>
<parameter key="target_role" value="prediction"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="7.3.000-BETA" expanded="true" height="82" name="Performance (3)" width="90" x="715" y="340"/>
<connect from_port="input 1" to_op="Fit Trend" to_port="example set"/>
<connect from_op="Fit Trend" from_port="example set with trend" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Sort" to_port="example set input"/>
<connect from_op="Sort" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

 

Thanks

MM

 

PS: here is the stack trace

Exception: java.lang.IllegalArgumentException
Message: Duplicate attribute name: trend(label)
Stack trace:

com.rapidminer.example.SimpleAttributes.register(SimpleAttributes.java:124)
com.rapidminer.example.SimpleAttributes.add(SimpleAttributes.java:203)
com.rapidminer.example.AbstractAttributes.addRegular(AbstractAttributes.java:94)
com.rapidminer.operator.preprocessing.series.filter.Trend.doWork(Trend.java:153)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:76)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:812)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:807)
java.security.AccessController.doPrivileged(Native Method)
com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:807)
com.rapidminer.operator.meta.ParameterIteratingOperatorChain.executeSubprocess(ParameterIteratingOperatorChain.java:262)
com.rapidminer.operator.meta.ParameterIteratingOperatorChain.getPerformanceVector(ParameterIteratingOperatorChain.java:305)
com.rapidminer.operator.meta.GridSearchParameterOptimizationOperator.computeCurrentPerformance(GridSearchParameterOptimizationOperator.java:113)
com.rapidminer.operator.meta.GridSearchParameterOptimizationOperator.doWork(GridSearchParameterOptimizationOperator.java:177)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:76)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:812)
com.rapidminer.operator.ExecutionUnit$3.run(ExecutionUnit.java:807)
java.security.AccessController.doPrivileged(Native Method)
com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:807)
com.rapidminer.operator.OperatorChain.doWork(OperatorChain.java:428)
com.rapidminer.operator.Operator.execute(Operator.java:1005)
com.rapidminer.Process.run(Process.java:1195)
com.rapidminer.Process.run(Process.java:1091)
com.rapidminer.Process.run(Process.java:1044)
com.rapidminer.Process.run(Process.java:1039)
com.rapidminer.Process.run(Process.java:1029)
com.rapidminer.gui.ProcessThread.run(ProcessThread.java:65)

 

Tagged:

Answers

  • Options
    Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @stevefarr please move this thread to the Studio forum. This sould not be in the Building Block forum. 

    Thanks.

  • Options
    Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    Hi,

     

    I checked out your process and see a few things that are not quite correct. You vary the C by decimal increments: 1.1, 1.2, 1.3, that's not very effecient and you won't get anything bump in performance. May I suggest using ten steps and varying from 0 to 10,000?

     

    In addition, with time series you have to use the Sliding Window Validation and nest it with the Forecast Performance operator, then you can optimize trend accuracy. 

     

    Update: See XML below as an example.

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.2.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="optimize_parameters_grid" compatibility="7.2.003" expanded="true" height="124" name="Optimize Parameters (Grid)" width="90" x="447" y="34">
    <list key="parameters">
    <parameter key="Backtesting.training_window_width" value="[6;12;10;linear]"/>
    <parameter key="Backtesting.test_window_width" value="[6;12;10;linear]"/>
    <parameter key="SVM Building.kernel_gamma" value="[.001;1000;6;logarithmic]"/>
    <parameter key="SVM Building.C" value="[0;1000;5;linear]"/>
    </list>
    <process expanded="true">
    <operator activated="true" class="series:sliding_window_validation" compatibility="7.2.000" expanded="true" height="124" name="Backtesting" width="90" x="179" y="34">
    <parameter key="training_window_width" value="8"/>
    <parameter key="test_window_width" value="8"/>
    <process expanded="true">
    <operator activated="true" class="support_vector_machine" compatibility="7.2.003" expanded="true" height="124" name="SVM Building" width="90" x="179" y="34">
    <parameter key="kernel_type" value="radial"/>
    <parameter key="kernel_gamma" value="0.001"/>
    </operator>
    <connect from_port="training" to_op="SVM Building" to_port="training set"/>
    <connect from_op="SVM Building" from_port="model" to_port="model"/>
    <portSpacing port="source_training" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="7.1.001" expanded="true" height="82" name="Apply Validation Model" width="90" x="45" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="series:forecasting_performance" compatibility="7.2.000" expanded="true" height="82" name="Measure Forecast Perf" width="90" x="246" y="34">
    <parameter key="horizon" value="1"/>
    <parameter key="main_criterion" value="prediction_trend_accuracy"/>
    </operator>
    <connect from_port="model" to_op="Apply Validation Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Validation Model" to_port="unlabelled data"/>
    <connect from_op="Apply Validation Model" from_port="labelled data" to_op="Measure Forecast Perf" to_port="labelled data"/>
    <connect from_op="Measure Forecast Perf" from_port="performance" to_port="averagable 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="sink_averagable 1" spacing="0"/>
    <portSpacing port="sink_averagable 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="log" compatibility="7.2.003" expanded="true" height="82" name="Log" width="90" x="313" y="85">
    <parameter key="filename" value="C:\Users\tott_000\Documents\Hv5OptimizationLog.log"/>
    <list key="log">
    <parameter key="Performance" value="operator.Measure Forecast Perf.value.prediction_trend_accuracy"/>
    <parameter key="Gamma" value="operator.SVM Building.parameter.kernel_gamma"/>
    <parameter key="C" value="operator.SVM Building.parameter.C"/>
    <parameter key="Training Window Width" value="operator.Backtesting.parameter.training_window_width"/>
    <parameter key="Testing Window Width" value="operator.Backtesting.parameter.test_window_width"/>
    </list>
    </operator>
    <connect from_port="input 1" to_op="Backtesting" to_port="training"/>
    <connect from_op="Backtesting" from_port="model" to_port="result 1"/>
    <connect from_op="Backtesting" from_port="averagable 1" to_op="Log" to_port="through 1"/>
    <connect from_op="Log" from_port="through 1" to_port="performance"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_performance" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    </process>
    </operator>
    </process>
Sign In or Register to comment.