RapidMiner

0 Likes

logistic regression operator and weights

Status: Investigating

The operator information for the base logistic regression learner indicates it does not accept weighted examples (see screenshot).  However, if you actually test this by running a model on weighted vs unweighted examples, it is very clear that the resulting model is different, so it does appear that weighting is affecting this operator.   See the example process here:

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Retrieve Counterparty Risk Data" width="90" x="112" y="34">
        <parameter key="repository_entry" value="//Samples/Templates/Credit Risk Modeling/Counterparty Risk Data"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="112" y="136">
        <parameter key="attribute_name" value="Default"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="8.1.001" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="238">
        <list key="filters_list">
          <parameter key="filters_entry_key" value="Default.is_not_missing."/>
        </list>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.001" expanded="true" height="145" name="Validation" width="90" x="447" y="34">
        <parameter key="sampling_type" value="stratified sampling"/>
        <process expanded="true">
          <operator activated="true" class="h2o:logistic_regression" compatibility="7.6.001" expanded="true" height="124" name="Logistic Regression" width="90" x="175" y="34"/>
          <connect from_port="training set" to_op="Logistic Regression" to_port="training set"/>
          <connect from_op="Logistic Regression" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="179" y="34"/>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <operator activated="true" class="generate_weight_stratification" compatibility="8.1.001" expanded="true" height="82" name="Generate Weight (Stratification)" width="90" x="313" y="238">
        <parameter key="total_weight" value="424.0"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.001" expanded="true" height="145" name="Validation (2)" width="90" x="447" y="238">
        <parameter key="sampling_type" value="stratified sampling"/>
        <process expanded="true">
          <operator activated="true" class="h2o:logistic_regression" compatibility="7.6.001" expanded="true" height="124" name="Logistic Regression (2)" width="90" x="175" y="34"/>
          <connect from_port="training set" to_op="Logistic Regression (2)" to_port="training set"/>
          <connect from_op="Logistic Regression (2)" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="false" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance" compatibility="8.1.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34"/>
          <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
          <connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance (2)" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="false" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Retrieve Counterparty Risk Data" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Validation" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Generate Weight (Stratification)" to_port="example set input"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <connect from_op="Generate Weight (Stratification)" from_port="example set output" to_op="Validation (2)" to_port="example set"/>
      <connect from_op="Validation (2)" from_port="model" to_port="result 4"/>
      <connect from_op="Validation (2)" from_port="test result set" to_port="result 5"/>
      <connect from_op="Validation (2)" from_port="performance 1" to_port="result 6"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
      <portSpacing port="sink_result 7" spacing="0"/>
    </process>
  </operator>
</process>

Can the operator information be updated or clarified?  Thanks.

2 Comments (2 New)
Comments
Community Manager
Status: Investigating

Got it @Telcontar120. Pushing to dev team. Stay tuned.

Community Manager

Confirmed as issue. Ticket created.