Options

round real values to integer

phonigsphonigs Member Posts: 3 Newbie
Hey Miners ;)

i have made a process where i am fillling in values in an example set using the impute missing values operator and the kNN-Algorithm. The new Values fit very well to my exmple set, but they are all up to the 3rd place after the comma.

Some values have to bee as an Integer. So is there any operator or function i can use to round my values. i have been trying the Generate Attributes operator, but this wasnt turning my values into roundet one, it generated new values.

Hope this question isnt too common.

regards Niklas

Answers

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,517 RM Data Scientist
    edited January 2021
    Hi,
    please check the attached process. I think the feature you don't know about is, that you can overwrite attributes with Generate Attributes.

    Cheers,
    Martin

    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.8.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="generate_data" compatibility="9.8.001" expanded="true" height="68" name="Generate Data" width="90" x="246" y="136">
            <parameter key="target_function" value="random"/>
            <parameter key="number_examples" value="100"/>
            <parameter key="number_of_attributes" value="5"/>
            <parameter key="attributes_lower_bound" value="-10.0"/>
            <parameter key="attributes_upper_bound" value="10.0"/>
            <parameter key="gaussian_standard_deviation" value="10.0"/>
            <parameter key="largest_radius" value="10.0"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.8.001" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="136">
            <list key="function_descriptions">
              <parameter key="att1" value="if(rand()&gt;0.1,round(att1),MISSING_NUMERIC)"/>
            </list>
            <parameter key="keep_all" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Create Some missings</description>
          </operator>
          <operator activated="true" class="impute_missing_values" compatibility="9.8.001" expanded="true" height="68" name="Impute Missing Values" width="90" x="581" y="136">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="iterate" value="true"/>
            <parameter key="learn_on_complete_cases" value="true"/>
            <parameter key="order" value="chronological"/>
            <parameter key="sort" value="ascending"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <process expanded="true">
              <operator activated="true" class="k_nn" compatibility="9.8.001" expanded="true" height="82" name="k-NN" width="90" x="246" y="34">
                <parameter key="k" value="5"/>
                <parameter key="weighted_vote" value="true"/>
                <parameter key="measure_types" value="MixedMeasures"/>
                <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
                <parameter key="nominal_measure" value="NominalDistance"/>
                <parameter key="numerical_measure" value="EuclideanDistance"/>
                <parameter key="divergence" value="GeneralizedIDivergence"/>
                <parameter key="kernel_type" value="radial"/>
                <parameter key="kernel_gamma" value="1.0"/>
                <parameter key="kernel_sigma1" value="1.0"/>
                <parameter key="kernel_sigma2" value="0.0"/>
                <parameter key="kernel_sigma3" value="2.0"/>
                <parameter key="kernel_degree" value="3.0"/>
                <parameter key="kernel_shift" value="1.0"/>
                <parameter key="kernel_a" value="1.0"/>
                <parameter key="kernel_b" value="0.0"/>
              </operator>
              <connect from_port="example set source" to_op="k-NN" to_port="training set"/>
              <connect from_op="k-NN" from_port="model" to_port="model sink"/>
              <portSpacing port="source_example set source" spacing="0"/>
              <portSpacing port="sink_model sink" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.8.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="782" y="136">
            <list key="function_descriptions">
              <parameter key="att1" value="round(att1)"/>
            </list>
            <parameter key="keep_all" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">round everything</description>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Impute Missing Values" to_port="example set in"/>
          <connect from_op="Impute Missing Values" from_port="example set out" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>




    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
Sign In or Register to comment.