RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

"Is there any way to log Linear Regression Models?"

wesselwessel Member Posts: 537  Guru
edited June 2019 in Help
Dear All,

Is there any way to log Linear Regression Models?

Currently I have a log that looks like this:
image

I wish it to include information like:
- 0.243 * mPOP098 - 0.061 * mOFF098 + 0.540 * mPOP082 - 0.001 * sOFF046 - 0.068 * sPOP018 + 0.014

Is this possible?

Best regards,

Wessel
Tagged:

Answers

  • wesselwessel Member Posts: 537  Guru
    This is the best I came up with:


    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.003">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
        <process expanded="true" height="466" width="902">
          <operator activated="true" class="retrieve" compatibility="5.2.003" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//Samples/data/Polynomial"/>
          </operator>
          <operator activated="true" class="optimize_selection_forward" compatibility="5.2.003" expanded="true" height="94" name="F" width="90" x="179" y="30">
            <parameter key="maximal_number_of_attributes" value="5"/>
            <parameter key="speculative_rounds" value="555555555"/>
            <parameter key="alpha" value="0.0050"/>
            <process expanded="true" height="484" width="882">
              <operator activated="true" class="x_validation" compatibility="5.2.003" expanded="true" height="112" name="Validation" width="90" x="246" y="30">
                <parameter key="sampling_type" value="shuffled sampling"/>
                <process expanded="true" height="484" width="279">
                  <operator activated="true" class="linear_regression" compatibility="5.2.003" expanded="true" height="94" name="Linear Regression" width="90" x="112" y="30">
                    <parameter key="feature_selection" value="none"/>
                    <parameter key="eliminate_colinear_features" value="false"/>
                  </operator>
                  <connect from_port="training" to_op="Linear Regression" to_port="training set"/>
                  <connect from_op="Linear Regression" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="484" width="435">
                  <operator activated="true" class="apply_model" compatibility="5.2.003" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance_regression" compatibility="5.2.003" expanded="true" height="76" name="P" width="90" x="179" y="30">
                    <parameter key="correlation" value="true"/>
                  </operator>
                  <connect from_port="model" to_op="Apply Model" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
                  <connect from_op="Apply Model" from_port="labelled data" to_op="P" to_port="labelled data"/>
                  <connect from_op="P" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="linear_regression" compatibility="5.2.003" expanded="true" height="94" name="LR" width="90" x="112" y="165"/>
              <operator activated="true" class="weights_to_data" compatibility="5.2.003" expanded="true" height="60" name="DTW" width="90" x="45" y="300"/>
              <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="str" width="90" x="313" y="345">
                <list key="function_descriptions">
                  <parameter key="WA" value="concat(concat(str(Weight+0.0000001), &quot; * &quot;), concat(Attribute, &quot;  &quot;))"/>
                </list>
                <parameter key="use_standard_constants" value="false"/>
                <parameter key="keep_all" value="false"/>
              </operator>
              <operator activated="true" class="transpose" compatibility="5.2.003" expanded="true" height="76" name="Transpose" width="90" x="246" y="210"/>
              <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="210">
                <list key="function_descriptions">
                  <parameter key="A" value="&quot; &quot;"/>
                </list>
                <parameter key="use_standard_constants" value="false"/>
              </operator>
              <operator activated="true" class="loop_attributes" compatibility="5.2.003" expanded="true" height="60" name="Merge" width="90" x="581" y="255">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="A"/>
                <parameter key="invert_selection" value="true"/>
                <parameter key="iteration_macro" value="att_1"/>
                <process expanded="true" height="484" width="930">
                  <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="447" y="30">
                    <list key="function_descriptions">
                      <parameter key="A" value="concat(A, %{att_1})"/>
                    </list>
                    <parameter key="use_standard_constants" value="false"/>
                  </operator>
                  <connect from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
                  <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="example set"/>
                  <portSpacing port="source_example set" spacing="0"/>
                  <portSpacing port="sink_example set" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="extract_log_value" compatibility="5.2.003" expanded="true" height="60" name="A1" width="90" x="782" y="300">
                <parameter key="attribute_name" value="A"/>
                <parameter key="example_index" value="1"/>
              </operator>
              <operator activated="true" class="log" compatibility="5.2.003" expanded="true" height="94" name="Log" width="90" x="715" y="75">
                <list key="log">
                  <parameter key="n" value="operator.F.value.number of attributes"/>
                  <parameter key="f" value="operator.F.value.feature_names"/>
                  <parameter key="p" value="operator.Validation.value.performance1"/>
                  <parameter key="c" value="operator.Validation.value.performance2"/>
                  <parameter key="d" value="operator.Validation.value.deviation"/>
                  <parameter key="A1" value="operator.A1.value.data_value"/>
                </list>
              </operator>
              <connect from_port="example set" to_op="Validation" to_port="training"/>
              <connect from_op="Validation" from_port="training" to_op="LR" to_port="training set"/>
              <connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
              <connect from_op="LR" from_port="weights" to_op="DTW" to_port="attribute weights"/>
              <connect from_op="DTW" from_port="example set" to_op="str" to_port="example set input"/>
              <connect from_op="str" from_port="example set output" to_op="Transpose" to_port="example set input"/>
              <connect from_op="Transpose" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
              <connect from_op="Generate Attributes" from_port="example set output" to_op="Merge" to_port="example set"/>
              <connect from_op="Merge" from_port="example set" to_op="A1" to_port="example set"/>
              <connect from_op="A1" from_port="example set" to_op="Log" to_port="through 2"/>
              <connect from_op="Log" from_port="through 1" to_port="performance"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="select_by_weights" compatibility="5.2.003" expanded="true" height="94" name="Select by Weights" width="90" x="315" y="30"/>
          <operator activated="true" class="x_validation" compatibility="5.2.003" expanded="true" height="112" name="Validation (2)" width="90" x="450" y="30">
            <parameter key="sampling_type" value="shuffled sampling"/>
            <process expanded="true" height="484" width="427">
              <operator activated="true" class="linear_regression" compatibility="5.2.003" expanded="true" height="94" name="Linear Regression (2)" width="90" x="168" y="30">
                <parameter key="feature_selection" value="none"/>
                <parameter key="eliminate_colinear_features" value="false"/>
              </operator>
              <connect from_port="training" to_op="Linear Regression (2)" to_port="training set"/>
              <connect from_op="Linear Regression (2)" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true" height="484" width="427">
              <operator activated="true" class="apply_model" compatibility="5.2.003" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_regression" compatibility="5.2.003" expanded="true" height="76" name="P (2)" width="90" x="180" y="30">
                <parameter key="main_criterion" value="correlation"/>
                <parameter key="root_mean_squared_error" value="false"/>
                <parameter key="correlation" value="true"/>
              </operator>
              <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (2)" from_port="labelled data" to_op="P (2)" to_port="labelled data"/>
              <connect from_op="P (2)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.2.003" expanded="true" height="76" name="Apply Model (3)" width="90" x="585" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="log_to_data" compatibility="5.2.003" expanded="true" height="94" name="Log to Data" width="90" x="246" y="165">
            <parameter key="log_name" value="Log"/>
          </operator>
          <operator activated="true" class="store" compatibility="5.2.003" expanded="true" height="60" name="Store" width="90" x="447" y="210">
            <parameter key="repository_entry" value="res/%{R}_RESULT"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="F" to_port="example set"/>
          <connect from_op="F" from_port="example set" to_op="Select by Weights" to_port="example set input"/>
          <connect from_op="F" from_port="attribute weights" to_op="Select by Weights" to_port="weights"/>
          <connect from_op="F" from_port="performance" to_op="Log to Data" to_port="through 1"/>
          <connect from_op="Select by Weights" from_port="example set output" to_op="Validation (2)" to_port="training"/>
          <connect from_op="Validation (2)" from_port="model" to_op="Apply Model (3)" to_port="model"/>
          <connect from_op="Validation (2)" from_port="training" to_op="Apply Model (3)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (3)" from_port="labelled data" to_port="result 1"/>
          <connect from_op="Apply Model (3)" from_port="model" to_port="result 2"/>
          <connect from_op="Log to Data" from_port="exampleSet" to_op="Store" to_port="input"/>
          <connect from_op="Store" from_port="through" to_port="result 3"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="36"/>
          <portSpacing port="sink_result 4" spacing="54"/>
        </process>
      </operator>
    </process>
  • fritmorefritmore Member Posts: 90  Maven
    you should be able to build that expression in [shadow=red,left]Generate attributes[/shadow] operator.
    I dont know how to append it into the log automatically thou.
  • wesselwessel Member Posts: 537  Guru
    Dear All,

    I really need some way to use the numbers inside a linear regression model.
    Like Coefficient, Std. Error, etc.
    http://i.snag.gy/W82gm.jpg
    image

    Any way to do this?
    Maybe using the R plugin?
    Or write model to text file, load text file, and read?
    Maybe using scripting?
    Maybe I should edit the RM source code?

    Best regards,

    Wessel
  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869   Unicorn
    Hi Wessel,

    you can use the Execute Script operator to access the guts of a model. In your case, the class of the model is LinearRegression model. The attached process demonstrates how to access it via the scripting operator. The script currently writes the data simply to standard output. To use them in your process, you should create an example set from the data and pass it to the operator output.

    Regards,
    Marius
  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869   Unicorn
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.3.000" expanded="true" name="Process">
        <process expanded="true" height="392" width="567">
          <operator activated="true" class="retrieve" compatibility="5.3.000" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//Samples/data/Polynomial"/>
          </operator>
          <operator activated="true" class="linear_regression" compatibility="5.3.000" expanded="true" height="94" name="Linear Regression (2)" width="90" x="179" y="30">
            <parameter key="feature_selection" value="none"/>
            <parameter key="eliminate_colinear_features" value="false"/>
          </operator>
          <operator activated="true" class="execute_script" compatibility="5.3.000" expanded="true" height="76" name="Execute Script" width="90" x="313" y="30">
            <parameter key="script" value="import com.rapidminer.operator.learner.functions.LinearRegressionModel;&#10;&#10;LinearRegressionModel model = input[0];&#10;&#10;System.out.println(model.getCoefficients());&#10;"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Linear Regression (2)" to_port="training set"/>
          <connect from_op="Linear Regression (2)" from_port="model" to_op="Execute Script" to_port="input 1"/>
          <connect from_op="Execute Script" from_port="output 1" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
  • wesselwessel Member Posts: 537  Guru
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
        <process expanded="true" height="450" width="614">
          <operator activated="true" class="generate_data" compatibility="5.2.008" expanded="true" height="60" name="Generate Data" width="90" x="45" y="165">
            <parameter key="number_examples" value="1"/>
            <parameter key="number_of_attributes" value="7"/>
            <parameter key="attributes_lower_bound" value="33.0"/>
            <parameter key="attributes_upper_bound" value="33.00001"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.008" expanded="true" height="76" name="Generate Attributes" width="90" x="179" y="165">
            <list key="function_descriptions">
              <parameter key="str1" value="str(1)"/>
              <parameter key="str2" value="str(1)"/>
            </list>
          </operator>
          <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="246" y="30">
            <parameter key="repository_entry" value="//Samples/data/Polynomial"/>
          </operator>
          <operator activated="true" class="linear_regression" compatibility="5.2.008" expanded="true" height="94" name="Linear Regression (2)" width="90" x="380" y="30">
            <parameter key="feature_selection" value="none"/>
            <parameter key="eliminate_colinear_features" value="false"/>
          </operator>
          <operator activated="true" class="execute_script" compatibility="5.2.008" expanded="true" height="94" name="Execute Script" width="90" x="447" y="165">
            <parameter key="script" value="import com.rapidminer.operator.learner.functions.LinearRegressionModel;&#10;&#10;LinearRegressionModel model = input[0];&#10;ExampleSet exampleSet = input[1];&#10;int numberOfAttributes = exampleSet.getAttributes().size();&#10;String att = &quot;att&quot;&#10;int i = 1&#10;&#10;Example e = exampleSet.getExample(0);&#10;for (Object o : model.getCoefficients()) {&#10;&#9;e[&quot;att&quot; + i] = o;&#10;&#9;i++;&#10;}&#10;&#10;e[&quot;str1&quot;] = model;&#10;e[&quot;str2&quot;] = model.getCoefficients();&#10;&#9;&#10;return exampleSet;&#10;"/>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Execute Script" to_port="input 2"/>
          <connect from_op="Retrieve" from_port="output" to_op="Linear Regression (2)" to_port="training set"/>
          <connect from_op="Linear Regression (2)" from_port="model" to_op="Execute Script" to_port="input 1"/>
          <connect from_op="Execute Script" from_port="output 1" to_port="result 1"/>
          <connect from_op="Execute Script" from_port="output 2" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869   Unicorn
    Thanks for sharing!
Sign In or Register to comment.