Options

Rows to Column

MSRMSR Member Posts: 3 Contributor I
edited December 2018 in Help

Hi,

I believe this is something basic but I haven't found the answer in the forums. Some similar things, but they haven't worked for me. I am trying to convert some rows into attributes. I have som data that looks like this:

 

Name 1

attr1

attr2

attr3

Name 2

attr1

attr2

attr3

 

All in the same column. I want to transpose or pivot into this:

 

Name 1 attr1 attr2 attr3

Name 2 attr1 attr2 attr3

 

I've been trying for hours. Please help. Thanks.

Answers

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi @MSR,

     

    Can you share your real dataset ?

     

    Regards,

     

    Lionel

  • Options
    MSRMSR Member Posts: 3 Contributor I

    Is this what you need?

    [Edited]

    Thanks

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    Hi again @MSR,

     

    Can you share your data, a priori in your case, the files Data1.csv and Data2.csv ?

     

    Regards,

     

    Lionel

  • Options
    MSRMSR Member Posts: 3 Contributor I

    Hi,

     

    I modified the program and files for privacy. Here's the new code:

     

     

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="9.0.002" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
    <parameter key="csv_file" value="D:\DOCUMENTS\WMU\2018\Fall 2018\IEE 5170\HW\HW01\Data1.csv"/>
    <parameter key="skip_comments" value="true"/>
    <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
    <list key="annotations"/>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Section .true.polynominal.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="false"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="9.0.002" expanded="true" height="103" name="Filter Examples (2)" width="90" x="179" y="34">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="Section.ends_with.0"/>
    </list>
    <parameter key="filters_logic_and" value="false"/>
    </operator>
    <operator activated="true" class="read_csv" compatibility="9.0.002" expanded="true" height="68" name="Read CSV (2)" width="90" x="45" y="187">
    <parameter key="csv_file" value="D:\DOCUMENTS\WMU\2018\Fall 2018\IEE 5170\HW\HW01\Data2.csv"/>
    <parameter key="skip_comments" value="true"/>
    <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
    <list key="annotations"/>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Section .true.polynominal.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="false"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="9.0.002" expanded="true" height="103" name="Filter Examples (4)" width="90" x="179" y="187">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="Section.ends_with.0"/>
    </list>
    <parameter key="filters_logic_and" value="false"/>
    </operator>
    <operator activated="true" class="append" compatibility="9.0.002" expanded="true" height="103" name="Append" width="90" x="313" y="34"/>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="[?,]"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (2)" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="UnChecked1"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (3)" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="MCWhich sport do you prefer to watch"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (4)" width="90" x="849" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="MCWhat would be your preferred pizza"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (5)" width="90" x="983" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="MCWhat music do you prefer"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (6)" width="90" x="1117" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="MCWhat is your highest completed level of education"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (7)" width="90" x="1251" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="MCMarital Status"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (8)" width="90" x="1385" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="SAWhat year were you born"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace (9)" width="90" x="1519" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section"/>
    <parameter key="replace_what" value="SANumber of siblings"/>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Filter Examples (2)" to_port="example set input"/>
    <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Read CSV (2)" from_port="output" to_op="Filter Examples (4)" to_port="example set input"/>
    <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Append" from_port="merged set" to_op="Replace" to_port="example set input"/>
    <connect from_op="Replace" from_port="example set output" to_op="Replace (2)" to_port="example set input"/>
    <connect from_op="Replace (2)" from_port="example set output" to_op="Replace (3)" to_port="example set input"/>
    <connect from_op="Replace (3)" from_port="example set output" to_op="Replace (4)" to_port="example set input"/>
    <connect from_op="Replace (4)" from_port="example set output" to_op="Replace (5)" to_port="example set input"/>
    <connect from_op="Replace (5)" from_port="example set output" to_op="Replace (6)" to_port="example set input"/>
    <connect from_op="Replace (6)" from_port="example set output" to_op="Replace (7)" to_port="example set input"/>
    <connect from_op="Replace (7)" from_port="example set output" to_op="Replace (8)" to_port="example set input"/>
    <connect from_op="Replace (8)" from_port="example set output" to_op="Replace (9)" to_port="example set input"/>
    <connect from_op="Replace (9)" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

     

     

  • Options
    lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,195 Unicorn

    @MSR,

     

    It's far-fetched but it works : 

    rows_to_columns.pngloo

     

    To execute this process you have to : 

     - Install the Value Series extension from the MarketPlace.

     - Adapt the process according to the name of your "attributes" (they were "?" in the datas you provided).

     

    The process : 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="operator_toolbox:create_exampleset" compatibility="1.4.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="112" y="34">
    <parameter key="generator_type" value="comma_separated_text"/>
    <list key="function_descriptions"/>
    <list key="numeric_series_configuration"/>
    <list key="date_series_configuration"/>
    <list key="date_series_configuration (interval)"/>
    <parameter key="input_csv_text" value="Section&#10;sophia&#10;att_1&#10;att_2&#10;att_3&#10;att_4&#10;att_5&#10;att_6&#10;Jacob&#10;att_1&#10;att_2&#10;att_3&#10;att_4&#10;att_5&#10;att_6&#10;Ethan&#10;att_1&#10;att_2&#10;att_3&#10;att_4&#10;att_5&#10;att_6&#10;"/>
    </operator>
    <operator activated="true" class="series:lag_series" compatibility="7.4.000" expanded="true" height="82" name="Lag Series" width="90" x="246" y="34">
    <list key="attributes">
    <parameter key="Section" value="1"/>
    </list>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="9.0.002" expanded="true" height="82" name="Filter Example Range" width="90" x="380" y="34">
    <parameter key="first_example" value="1"/>
    <parameter key="last_example" value="1"/>
    <parameter key="invert_filter" value="true"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="9.0.002" expanded="true" height="103" name="Filter Examples" width="90" x="514" y="34">
    <parameter key="parameter_string" value="Section-1 = att_6"/>
    <parameter key="condition_class" value="attribute_value_filter"/>
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list"/>
    </operator>
    <operator activated="true" class="replace" compatibility="9.0.002" expanded="true" height="82" name="Replace" width="90" x="648" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Section-1"/>
    <parameter key="replace_what" value="att_."/>
    </operator>
    <operator activated="true" class="series:replace_missing_series_values" compatibility="7.4.000" expanded="true" height="82" name="Replace Missing Values (Series)" width="90" x="782" y="34">
    <parameter key="attribute_name" value="Section-1"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="916" y="34">
    <list key="function_descriptions">
    <parameter key="value" value="1"/>
    </list>
    </operator>
    <operator activated="true" class="aggregate" compatibility="9.0.002" expanded="true" height="82" name="Aggregate" width="90" x="1050" y="34">
    <list key="aggregation_attributes">
    <parameter key="Section" value="concatenation"/>
    </list>
    <parameter key="group_by_attributes" value="Section-1"/>
    </operator>
    <operator activated="true" class="split" compatibility="9.0.002" expanded="true" height="82" name="Split" width="90" x="1184" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="concat(Section)"/>
    <parameter key="split_pattern" value="[|]"/>
    </operator>
    <connect from_op="Create ExampleSet" from_port="output" to_op="Lag Series" to_port="example set input"/>
    <connect from_op="Lag Series" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Replace" to_port="example set input"/>
    <connect from_op="Replace" from_port="example set output" to_op="Replace Missing Values (Series)" to_port="example set input"/>
    <connect from_op="Replace Missing Values (Series)" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
    <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps,

     

    Regards,

     

    Lionel

  • Options
    Telcontar120Telcontar120 Moderator, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 Unicorn

    I think there is an easier solution here, although as is so often the case with RapidMiner, there are probably multiple ways to do this.

    Basically what you want to do is to De-Pivot the data, but your current structure lacks an index value.  So we just create an index value using some MOD arithmetic to make sure that the appropriate groups get created based on the number of rows that need to be grouped together.

    Here is a process that will do that same thing--you will simply need to drop the first subprocess (which simply creates an appropriate dataset) and swap it for your Read CSV, and then number of rows for the index calculation in the Generate Attributes (and remember that java counting begins with zero).

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="false" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV" width="90" x="179" y="136">
    <parameter key="csv_file" value="C:\Users\brian\Downloads\sample.txt"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <operator activated="true" class="subprocess" compatibility="9.0.002" expanded="true" height="82" name="Subprocess" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Create Document" width="90" x="45" y="34">
    <parameter key="text" value="Time&#10;Location&#10;Incident&#10;Oct 25th&#10;Tampa&#10;Robbery&#10;Oct 25th&#10;Miami&#10;Theft&#10;Oct 26th&#10;Brandon&#10;Assault&#10;"/>
    </operator>
    <operator activated="true" class="text:documents_to_data" compatibility="8.1.000" expanded="true" height="82" name="Documents to Data" width="90" x="179" y="34">
    <parameter key="text_attribute" value="text"/>
    </operator>
    <operator activated="true" class="split" compatibility="9.0.002" expanded="true" height="82" name="Split" width="90" x="313" y="34">
    <parameter key="split_pattern" value="\s"/>
    </operator>
    <operator activated="true" class="transpose" compatibility="9.0.002" expanded="true" height="82" name="Transpose" width="90" x="447" y="34"/>
    <operator activated="true" class="text_to_nominal" compatibility="9.0.002" expanded="true" height="82" name="Text to Nominal" width="90" x="581" y="34">
    <parameter key="attribute" value="att_1"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="att_1"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <connect from_op="Create Document" from_port="output" to_op="Documents to Data" to_port="documents 1"/>
    <connect from_op="Documents to Data" from_port="example set" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Text to Nominal" to_port="example set input"/>
    <connect from_op="Text to Nominal" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">This contains the type of data which this works on, with each attribute contained in a separate row but cycling through the same attributes in order.</description>
    </operator>
    <operator activated="true" class="generate_id" compatibility="9.0.002" expanded="true" height="82" name="Generate ID" width="90" x="179" y="34"/>
    <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
    <list key="function_descriptions">
    <parameter key="index" value="mod(id,3)"/>
    <parameter key="example" value="floor((id-1)/3)"/>
    </list>
    </operator>
    <operator activated="true" class="pivot" compatibility="9.0.002" expanded="true" height="82" name="Pivot" width="90" x="447" y="34">
    <parameter key="group_attribute" value="example"/>
    <parameter key="index_attribute" value="index"/>
    </operator>
    <operator activated="true" class="rename_by_example_values" compatibility="9.0.002" expanded="true" height="82" name="Rename by Example Values" width="90" x="581" y="34">
    <description align="center" color="transparent" colored="false" width="126">Used if the names of the attributes are in the first set of examples.</description>
    </operator>
    <operator activated="true" class="rename" compatibility="9.0.002" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
    <parameter key="old_name" value="0.0"/>
    <parameter key="new_name" value="Example"/>
    <list key="rename_additional_attributes"/>
    <description align="center" color="transparent" colored="false" width="126">Used to rename attributes manually if needed.</description>
    </operator>
    <operator activated="true" class="set_role" compatibility="9.0.002" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
    <parameter key="attribute_name" value="Example"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <connect from_op="Subprocess" from_port="out 1" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_op="Rename by Example Values" to_port="example set input"/>
    <connect from_op="Rename by Example Values" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>
    Brian T.
    Lindon Ventures 
    Data Science Consulting from Certified RapidMiner Experts
Sign In or Register to comment.