Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

Regression via classification

wesselwessel Member Posts: 537 Maven
edited November 2018 in Help
Hi,

I wish to run a classification algorithm on a regression task.
Generated a new attribute where the label attribute is now discrete.
Unfortunately, I can no longer, straightforward apply the Performance (Regression) operator.

I have to compute the absolute error myself using generate attributes.
Since I'm applying attribute selection, I'm doing this over and over again, which is really slow.
Is there a faster way to achieve this result?

Best regards,

Wessel

Attached an example process below
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
    <parameter key="use_exact_number" value="false"/>
    <parameter key="exact_number_of_attributes" value="-1"/>
    <parameter key="min_number_of_attributes" value="1"/>
    <parameter key="limit_max_number" value="true"/>
    <parameter key="max_number_of_attributes" value="2"/>
    <process expanded="true">
      <operator activated="true" class="x_prediction" compatibility="7.0.001" expanded="true" height="68" name="X-Prediction" width="90" x="45" y="34">
        <parameter key="leave_one_out" value="false"/>
        <parameter key="number_of_validations" value="10"/>
        <parameter key="sampling_type" value="stratified sampling"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="7.0.001" expanded="true" height="82" name="k-NN" width="90" x="45" y="187">
            <parameter key="k" value="3"/>
            <parameter key="weighted_vote" value="false"/>
            <parameter key="measure_types" value="MixedMeasures"/>
            <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
            <parameter key="nominal_measure" value="NominalDistance"/>
            <parameter key="numerical_measure" value="EuclideanDistance"/>
            <parameter key="divergence" value="GeneralizedIDivergence"/>
            <parameter key="kernel_type" value="radial"/>
            <parameter key="kernel_gamma" value="1.0"/>
            <parameter key="kernel_sigma1" value="1.0"/>
            <parameter key="kernel_sigma2" value="0.0"/>
            <parameter key="kernel_sigma3" value="2.0"/>
            <parameter key="kernel_degree" value="3.0"/>
            <parameter key="kernel_shift" value="1.0"/>
            <parameter key="kernel_a" value="1.0"/>
            <parameter key="kernel_b" value="0.0"/>
          </operator>
          <connect from_port="training" to_op="k-NN" to_port="training set"/>
          <connect from_op="k-NN" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="85">
            <list key="application_parameters"/>
            <parameter key="create_view" value="false"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="unlabelled data" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_port="labelled data"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_unlabelled data" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_labelled data" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="performance_classification" compatibility="7.0.001" expanded="true" height="82" name="CP" width="90" x="179" y="34">
        <parameter key="main_criterion" value="first"/>
        <parameter key="accuracy" value="true"/>
        <parameter key="classification_error" value="false"/>
        <parameter key="kappa" value="false"/>
        <parameter key="weighted_mean_recall" value="false"/>
        <parameter key="weighted_mean_precision" value="false"/>
        <parameter key="spearman_rho" value="false"/>
        <parameter key="kendall_tau" value="false"/>
        <parameter key="absolute_error" value="false"/>
        <parameter key="relative_error" value="false"/>
        <parameter key="relative_error_lenient" value="false"/>
        <parameter key="relative_error_strict" value="false"/>
        <parameter key="normalized_absolute_error" value="false"/>
        <parameter key="root_mean_squared_error" value="false"/>
        <parameter key="root_relative_squared_error" value="false"/>
        <parameter key="squared_error" value="false"/>
        <parameter key="correlation" value="false"/>
        <parameter key="squared_correlation" value="false"/>
        <parameter key="cross-entropy" value="false"/>
        <parameter key="margin" value="false"/>
        <parameter key="soft_margin_loss" value="false"/>
        <parameter key="logistic_loss" value="false"/>
        <parameter key="skip_undefined_labels" value="true"/>
        <parameter key="use_example_weights" value="true"/>
        <list key="class_weights"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="45" y="187">
        <list key="function_descriptions">
          <parameter key="prediction(leadTime)" value="parse([prediction(leadTime)])"/>
          <parameter key="leadTime" value="[leadTime_numeric]"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="performance_regression" compatibility="7.0.001" expanded="true" height="82" name="RP" width="90" x="179" y="187">
        <parameter key="main_criterion" value="first"/>
        <parameter key="root_mean_squared_error" value="false"/>
        <parameter key="absolute_error" value="true"/>
        <parameter key="relative_error" value="false"/>
        <parameter key="relative_error_lenient" value="false"/>
        <parameter key="relative_error_strict" value="false"/>
        <parameter key="normalized_absolute_error" value="false"/>
        <parameter key="root_relative_squared_error" value="false"/>
        <parameter key="squared_error" value="false"/>
        <parameter key="correlation" value="false"/>
        <parameter key="squared_correlation" value="false"/>
        <parameter key="prediction_average" value="false"/>
        <parameter key="spearman_rho" value="false"/>
        <parameter key="kendall_tau" value="false"/>
        <parameter key="skip_undefined_labels" value="true"/>
        <parameter key="use_example_weights" value="true"/>
      </operator>
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="313" y="187">
        <list key="log">
          <parameter key="rp" value="operator.RP.value.performance"/>
          <parameter key="cp" value="operator.CP.value.performance"/>
          <parameter key="fn" value="operator.Loop Subsets.value.feature_names"/>
        </list>
        <parameter key="sorting_type" value="none"/>
        <parameter key="sorting_k" value="100"/>
        <parameter key="persistent" value="false"/>
      </operator>
      <connect from_port="example set" to_op="X-Prediction" to_port="example set"/>
      <connect from_op="X-Prediction" from_port="labelled data" to_op="CP" to_port="labelled data"/>
      <connect from_op="CP" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="RP" to_port="labelled data"/>
      <connect from_op="RP" from_port="performance" to_op="Log" to_port="through 1"/>
      <portSpacing port="source_example set" spacing="0"/>
    </process>
  </operator>
</process>
Tagged:

Answers

  • MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,531 RM Data Scientist
    Hi wessel,

    i can't get your processes in  - somethings wrong with the xml.

    Can't you simply use parse numbers on the prediction, swap the the label and prediction roles on the right hand side of x-val and use standard performance operator?

    ~Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
Sign In or Register to comment.