Quick way to extract all attribute names and write to a document?

dangdang Member Posts: 11 Contributor II
edited November 2018 in Help

Dear Community,

 

I want to depivot a wide table which has names like

Pages[0].Item.Version

Pages[0].Item.Language

Pages[0].Url.Path

Pages[0].VisitPageIndex

Pages[0].Duration

.

.

.

Pages[1].Item.Version

Pages[1].Item.Language

Pages[1].Url.Path

Pages[1].VisitPageIndex

Pages[1].Duration

 

I used a regex in 'select attribute' to filter the wanted attributes, then I de-pivot the table. But I feel it is more convenient if I can copy all the attribute names from a flat document...

 

Thanks in advance!

 

 

 

Best Answer

  • awchisholmawchisholm RapidMiner Certified Expert, Member Posts: 458 Unicorn
    Solution Accepted

    I created a process, using Groovy, that makes a new example set containing the names and types of an existing example set. Here's a link.

     

    and the XML is here...

     

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.0.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="112" y="34">
    <list key="attribute_values">
    <parameter key="label_real" value="1.1"/>
    <parameter key="regular_real" value="1.1"/>
    <parameter key="regular_integer" value="1"/>
    <parameter key="regular_string" value="&quot;hello&quot;"/>
    <parameter key="regular_boolean" value="true"/>
    <parameter key="regular_date" value="date_now()"/>
    <parameter key="regular_text" value="&quot;world&quot;"/>
    <parameter key="id_integer" value="1"/>
    <parameter key="cluster_integer" value="1"/>
    <parameter key="prediction_integer" value="1"/>
    <parameter key="weight_integer" value="1"/>
    <parameter key="batch_integer" value="1"/>
    </list>
    <list key="set_additional_roles">
    <parameter key="label_real" value="label"/>
    <parameter key="id_integer" value="id"/>
    <parameter key="cluster_integer" value="cluster"/>
    <parameter key="prediction_integer" value="prediction"/>
    <parameter key="weight_integer" value="weight"/>
    <parameter key="batch_integer" value="batch"/>
    </list>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="7.0.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="regular_text"/>
    </operator>
    <operator activated="true" class="execute_script" compatibility="7.0.001" expanded="true" height="103" name="Execute Script" width="90" x="581" y="34">
    <parameter key="script" value="import com.rapidminer.tools.Ontology;&#10;ExampleSet exampleSet0 = input[0];&#10;&#10;Attribute[] attributes = new Attribute[3];&#10;attributes[0] = AttributeFactory.createAttribute(&quot;Name&quot;, Ontology.STRING);&#10;attributes[1] = AttributeFactory.createAttribute(&quot;Role&quot;, Ontology.STRING);&#10;attributes[2] = AttributeFactory.createAttribute(&quot;Type&quot;, Ontology.STRING);&#10;MemoryExampleTable table = new MemoryExampleTable(attributes);&#10;DataRowFactory ROW_FACTORY = new DataRowFactory(0);&#10;&#10;Integer numberOfAttributes = exampleSet0.getAttributes().allSize();&#10;String role = &quot;&quot;;&#10;int i = 0;&#10;String[] values = new String[3];&#10;for (Attribute attribute : exampleSet0.getAttributes().allAttributes()) {&#10;&#9;String valueType = attribute.getValueType();&#10;&#9;operator.logNote (&quot;Type: &quot; + valueType);&#10;&#9;String name = attribute.getName();&#10;&#9;AttributeRole currentRole = exampleSet0.getAttributes().getRole(attribute);&#10;&#9;Boolean isSpecial = currentRole.isSpecial();&#10;&#9;if (isSpecial == true) {&#10;&#9;&#9;role = currentRole.getSpecialName();&#10;&#9;}&#10;&#9;else {&#10;&#9;&#9;role = &quot;regular&quot;;&#10;&#9;}&#10;&#9;i = i + 1;&#10;&#9;values[0] = name;&#10;&#9;values[1] = role;&#10;&#9;values[2] = valueType;&#10;&#9;DataRow row = ROW_FACTORY.create(values, attributes); &#10; table.addDataRow(row);&#9;&#10;}&#10;// the first output is the input&#10;// the second output is the meta data for the input&#10;ExampleSet exampleSet = table.createExampleSet();&#10;ExampleSet[] exampleSets = new ExampleSet[2];&#10;exampleSets[0] = exampleSet0;&#10;exampleSets[1] = exampleSet;&#10;&#10;return exampleSets;"/>
    </operator>
    <connect from_op="Generate Data by User Specification" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
    <connect from_op="Nominal to Text" from_port="example set output" to_op="Execute Script" to_port="input 1"/>
    <connect from_op="Execute Script" from_port="output 1" to_port="result 1"/>
    <connect from_op="Execute Script" from_port="output 2" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>

     

     

     

     

Sign In or Register to comment.