RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

Replace Old Multiple Column Names with New Column Names

ssawantssawant Member Posts: 3 Contributor I
edited May 27 in Help
I have an example set  with  many attributes. I want ot replace these attributes with new names which would be there in an example set.
How can we do that it in rapidminer.


Example:
Old_Column_Name  New_Column_Name
a                                 apple
b                                 ball
c                                 cat
Tagged:
hbajpai

Answers

  • hbajpaihbajpai Member Posts: 87   Unicorn
    Hey @ssawant ,

    You can try Rename by Example Values for such a task. I typically use Create Example Set (comma seperated text) to create a table with old and new names and then append to the data before using Rename by Example values operator. 
    Best,
    Harshit
    mschmitzlionelderkrikorssawant
  • ssawantssawant Member Posts: 3 Contributor I
    Thanks for your suggestion @hbajpai.
    But, as i am totally new  to this can you help me by creating a dummy process and post it. 



  • hbajpaihbajpai Member Posts: 87   Unicorn
    Sure @ssawant, check out the below XML.
    <?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Dummy data" width="90" x="179" y="187">
            <parameter key="generator_type" value="numeric series"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration">
              <parameter key="a" value="linear.0\.0.1\.0"/>
              <parameter key="b" value="quadratic.0\.0.1\.0"/>
              <parameter key="c" value="square root.0\.0.1\.0"/>
            </list>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Rename row" width="90" x="179" y="34">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="a, b, c&#10;apple, ball, cat"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.3.000" expanded="true" height="103" name="Append (Superset)" width="90" x="380" y="34"/>
          <operator activated="true" class="rename_by_example_values" compatibility="9.6.000" expanded="true" height="82" name="Rename by Example Values" width="90" x="581" y="34">
            <parameter key="row_number" value="1"/>
          </operator>
          <operator activated="true" class="guess_types" compatibility="9.6.000" expanded="true" height="82" name="Guess Types" width="90" x="715" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="decimal_point_character" value="."/>
          </operator>
          <connect from_op="Dummy data" from_port="output" to_op="Append (Superset)" to_port="example set 2"/>
          <connect from_op="Rename row" from_port="output" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Guess Types" to_port="example set input"/>
          <connect from_op="Guess Types" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


    Best,
    Harshit
    ssawant
  • ssawantssawant Member Posts: 3 Contributor I
    edited May 28
    @hbajpai Thnks for the response but my senerio is bit different please find the below files,process and expected output.
  • hbajpaihbajpai Member Posts: 87   Unicorn
    @ssawant I am not sure how it is different for you, as I am able to use the same process to reach to the expected output.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.6.000" expanded="true" height="68" name="Your default file" width="90" x="112" y="85">
            <parameter key="excel_file" value="C:\Users\harsh\Downloads\old_columns.xlsx"/>
            <parameter key="sheet_selection" value="sheet number"/>
            <parameter key="sheet_number" value="1"/>
            <parameter key="imported_cell_range" value="A1"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="first_row_as_names" value="true"/>
            <list key="annotations"/>
            <parameter key="date_format" value=""/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="locale" value="English (United States)"/>
            <parameter key="read_all_values_as_polynominal" value="false"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="PARAMETER_VALUE_1.true.polynominal.attribute"/>
              <parameter key="1" value="PARAMETER_VALUE_2.true.integer.attribute"/>
              <parameter key="2" value="PARAMETER_VALUE_3.true.polynominal.attribute"/>
              <parameter key="3" value="PARAMETER_VALUE_4.true.polynominal.attribute"/>
              <parameter key="4" value="PARAMETER_VALUE_5.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="246" y="289">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="PARAMETER_VALUE_1,PARAMETER_VALUE_2,PARAMETER_VALUE_3,PARAMETER_VALUE_4,PARAMETER_VALUE_5&#10;att1&#9;,att2,att3,att4,att5&#10;"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.3.000" expanded="true" height="103" name="Append (Superset)" width="90" x="447" y="85"/>
          <operator activated="true" class="rename_by_example_values" compatibility="9.6.000" expanded="true" height="82" name="Rename by Example Values" width="90" x="648" y="187">
            <parameter key="row_number" value="1"/>
          </operator>
          <operator activated="true" class="guess_types" compatibility="9.6.000" expanded="true" height="82" name="Guess Types" width="90" x="782" y="187">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="decimal_point_character" value="."/>
          </operator>
          <connect from_op="Your default file" from_port="output" to_op="Append (Superset)" to_port="example set 2"/>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Guess Types" to_port="example set input"/>
          <connect from_op="Guess Types" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


    Best,
    Harshit
Sign In or Register to comment.