Regression via classification

wesselwessel Member Posts: 537 Maven
edited November 2018 in Help
Hi,

I wish to run a classification algorithm on a regression task.
Generated a new attribute where the label attribute is now discrete.
Unfortunately, I can no longer, straightforward apply the Performance (Regression) operator.

I have to compute the absolute error myself using generate attributes.
Since I'm applying attribute selection, I'm doing this over and over again, which is really slow.
Is there a faster way to achieve this result?

Best regards,

Wessel

Attached an example process below
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
    <parameter key="use_exact_number" value="false"/>
    <parameter key="exact_number_of_attributes" value="-1"/>
    <parameter key="min_number_of_attributes" value="1"/>
    <parameter key="limit_max_number" value="true"/>
    <parameter key="max_number_of_attributes" value="2"/>
    <process expanded="true">
      <operator activated="true" class="x_prediction" compatibility="7.0.001" expanded="true" height="68" name="X-Prediction" width="90" x="45" y="34">
        <parameter key="leave_one_out" value="false"/>
        <parameter key="number_of_validations" value="10"/>
        <parameter key="sampling_type" value="stratified sampling"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="7.0.001" expanded="true" height="82" name="k-NN" width="90" x="45" y="187">
            <parameter key="k" value="3"/>
            <parameter key="weighted_vote" value="false"/>
            <parameter key="measure_types" value="MixedMeasures"/>
            <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
            <parameter key="nominal_measure" value="NominalDistance"/>
            <parameter key="numerical_measure" value="EuclideanDistance"/>
            <parameter key="divergence" value="GeneralizedIDivergence"/>
            <parameter key="kernel_type" value="radial"/>
            <parameter key="kernel_gamma" value="1.0"/>
            <parameter key="kernel_sigma1" value="1.0"/>
            <parameter key="kernel_sigma2" value="0.0"/>
            <parameter key="kernel_sigma3" value="2.0"/>
            <parameter key="kernel_degree" value="3.0"/>
            <parameter key="kernel_shift" value="1.0"/>
            <parameter key="kernel_a" value="1.0"/>
            <parameter key="kernel_b" value="0.0"/>
          </operator>
          <connect from_port="training" to_op="k-NN" to_port="training set"/>
          <connect from_op="k-NN" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="85">
            <list key="application_parameters"/>
            <parameter key="create_view" value="false"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="unlabelled data" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_port="labelled data"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_unlabelled data" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_labelled data" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="performance_classification" compatibility="7.0.001" expanded="true" height="82" name="CP" width="90" x="179" y="34">
        <parameter key="main_criterion" value="first"/>
        <parameter key="accuracy" value="true"/>
        <parameter key="classification_error" value="false"/>
        <parameter key="kappa" value="false"/>
        <parameter key="weighted_mean_recall" value="false"/>
        <parameter key="weighted_mean_precision" value="false"/>
        <parameter key="spearman_rho" value="false"/>
        <parameter key="kendall_tau" value="false"/>
        <parameter key="absolute_error" value="false"/>
        <parameter key="relative_error" value="false"/>
        <parameter key="relative_error_lenient" value="false"/>
        <parameter key="relative_error_strict" value="false"/>
        <parameter key="normalized_absolute_error" value="false"/>
        <parameter key="root_mean_squared_error" value="false"/>
        <parameter key="root_relative_squared_error" value="false"/>
        <parameter key="squared_error" value="false"/>
        <parameter key="correlation" value="false"/>
        <parameter key="squared_correlation" value="false"/>
        <parameter key="cross-entropy" value="false"/>
        <parameter key="margin" value="false"/>
        <parameter key="soft_margin_loss" value="false"/>
        <parameter key="logistic_loss" value="false"/>
        <parameter key="skip_undefined_labels" value="true"/>
        <parameter key="use_example_weights" value="true"/>
        <list key="class_weights"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="45" y="187">
        <list key="function_descriptions">
          <parameter key="prediction(leadTime)" value="parse([prediction(leadTime)])"/>
          <parameter key="leadTime" value="[leadTime_numeric]"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="performance_regression" compatibility="7.0.001" expanded="true" height="82" name="RP" width="90" x="179" y="187">
        <parameter key="main_criterion" value="first"/>
        <parameter key="root_mean_squared_error" value="false"/>
        <parameter key="absolute_error" value="true"/>
        <parameter key="relative_error" value="false"/>
        <parameter key="relative_error_lenient" value="false"/>
        <parameter key="relative_error_strict" value="false"/>
        <parameter key="normalized_absolute_error" value="false"/>
        <parameter key="root_relative_squared_error" value="false"/>
        <parameter key="squared_error" value="false"/>
        <parameter key="correlation" value="false"/>
        <parameter key="squared_correlation" value="false"/>
        <parameter key="prediction_average" value="false"/>
        <parameter key="spearman_rho" value="false"/>
        <parameter key="kendall_tau" value="false"/>
        <parameter key="skip_undefined_labels" value="true"/>
        <parameter key="use_example_weights" value="true"/>
      </operator>
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="313" y="187">
        <list key="log">
          <parameter key="rp" value="operator.RP.value.performance"/>
          <parameter key="cp" value="operator.CP.value.performance"/>
          <parameter key="fn" value="operator.Loop Subsets.value.feature_names"/>
        </list>
        <parameter key="sorting_type" value="none"/>
        <parameter key="sorting_k" value="100"/>
        <parameter key="persistent" value="false"/>
      </operator>
      <connect from_port="example set" to_op="X-Prediction" to_port="example set"/>
      <connect from_op="X-Prediction" from_port="labelled data" to_op="CP" to_port="labelled data"/>
      <connect from_op="CP" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="RP" to_port="labelled data"/>
      <connect from_op="RP" from_port="performance" to_op="Log" to_port="through 1"/>
      <portSpacing port="source_example set" spacing="0"/>
    </process>
  </operator>
</process>
Tagged:

Answers

  • MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,503 RM Data Scientist
    Hi wessel,

    i can't get your processes in  - somethings wrong with the xml.

    Can't you simply use parse numbers on the prediction, swap the the label and prediction roles on the right hand side of x-val and use standard performance operator?

    ~Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
Sign In or Register to comment.