Replace Old Multiple Column Names with New Column Names

ssawantssawant Member Posts: 3 Contributor I
edited May 2020 in Help
I have an example set  with  many attributes. I want ot replace these attributes with new names which would be there in an example set.
How can we do that it in rapidminer.


Example:
Old_Column_Name  New_Column_Name
a                                 apple
b                                 ball
c                                 cat
Tagged:

Answers

  • hbajpaihbajpai Member Posts: 102 Unicorn
    Hey @ssawant ,

    You can try Rename by Example Values for such a task. I typically use Create Example Set (comma seperated text) to create a table with old and new names and then append to the data before using Rename by Example values operator. 
    Best,
    Harshit
  • ssawantssawant Member Posts: 3 Contributor I
    Thanks for your suggestion @hbajpai.
    But, as i am totally new  to this can you help me by creating a dummy process and post it. 



  • hbajpaihbajpai Member Posts: 102 Unicorn
    Sure @ssawant, check out the below XML.
    <?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Dummy data" width="90" x="179" y="187">
            <parameter key="generator_type" value="numeric series"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration">
              <parameter key="a" value="linear.0\.0.1\.0"/>
              <parameter key="b" value="quadratic.0\.0.1\.0"/>
              <parameter key="c" value="square root.0\.0.1\.0"/>
            </list>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Rename row" width="90" x="179" y="34">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="a, b, c&#10;apple, ball, cat"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.3.000" expanded="true" height="103" name="Append (Superset)" width="90" x="380" y="34"/>
          <operator activated="true" class="rename_by_example_values" compatibility="9.6.000" expanded="true" height="82" name="Rename by Example Values" width="90" x="581" y="34">
            <parameter key="row_number" value="1"/>
          </operator>
          <operator activated="true" class="guess_types" compatibility="9.6.000" expanded="true" height="82" name="Guess Types" width="90" x="715" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="decimal_point_character" value="."/>
          </operator>
          <connect from_op="Dummy data" from_port="output" to_op="Append (Superset)" to_port="example set 2"/>
          <connect from_op="Rename row" from_port="output" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Guess Types" to_port="example set input"/>
          <connect from_op="Guess Types" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


    Best,
    Harshit
  • ssawantssawant Member Posts: 3 Contributor I
    edited May 2020
    @hbajpai Thnks for the response but my senerio is bit different please find the below files,process and expected output.
  • hbajpaihbajpai Member Posts: 102 Unicorn
    @ssawant I am not sure how it is different for you, as I am able to use the same process to reach to the expected output.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.6.000" expanded="true" height="68" name="Your default file" width="90" x="112" y="85">
            <parameter key="excel_file" value="C:\Users\harsh\Downloads\old_columns.xlsx"/>
            <parameter key="sheet_selection" value="sheet number"/>
            <parameter key="sheet_number" value="1"/>
            <parameter key="imported_cell_range" value="A1"/>
            <parameter key="encoding" value="SYSTEM"/>
            <parameter key="first_row_as_names" value="true"/>
            <list key="annotations"/>
            <parameter key="date_format" value=""/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="locale" value="English (United States)"/>
            <parameter key="read_all_values_as_polynominal" value="false"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="PARAMETER_VALUE_1.true.polynominal.attribute"/>
              <parameter key="1" value="PARAMETER_VALUE_2.true.integer.attribute"/>
              <parameter key="2" value="PARAMETER_VALUE_3.true.polynominal.attribute"/>
              <parameter key="3" value="PARAMETER_VALUE_4.true.polynominal.attribute"/>
              <parameter key="4" value="PARAMETER_VALUE_5.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="246" y="289">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="PARAMETER_VALUE_1,PARAMETER_VALUE_2,PARAMETER_VALUE_3,PARAMETER_VALUE_4,PARAMETER_VALUE_5&#10;att1&#9;,att2,att3,att4,att5&#10;"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.3.000" expanded="true" height="103" name="Append (Superset)" width="90" x="447" y="85"/>
          <operator activated="true" class="rename_by_example_values" compatibility="9.6.000" expanded="true" height="82" name="Rename by Example Values" width="90" x="648" y="187">
            <parameter key="row_number" value="1"/>
          </operator>
          <operator activated="true" class="guess_types" compatibility="9.6.000" expanded="true" height="82" name="Guess Types" width="90" x="782" y="187">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="decimal_point_character" value="."/>
          </operator>
          <connect from_op="Your default file" from_port="output" to_op="Append (Superset)" to_port="example set 2"/>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Append (Superset)" to_port="example set 1"/>
          <connect from_op="Append (Superset)" from_port="merged set" to_op="Rename by Example Values" to_port="example set input"/>
          <connect from_op="Rename by Example Values" from_port="example set output" to_op="Guess Types" to_port="example set input"/>
          <connect from_op="Guess Types" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    <b></b>


    Best,
    Harshit
Sign In or Register to comment.