Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Rename by Replacing - REGEX
Hi All - struggling to utilize regex to rename columns after pivoting. Ideally trying to remove the "count(IDT)_" and leave the year on the header. Any help would be appreciated. XML and sample data attached.
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="9.0.002" expanded="true" height="68" name="Read Excel (3)" width="90" x="112" y="1207">
<parameter key="excel_file" value="C:\Users\mdueber\OneDrive\OneDrive - JAGUAR LAND ROVER\Analytics Department\RapidMiner Support Files\RM on OneDrive Support\Service Retention\Data files\Car Parc\example2.xlsx"/>
<list key="annotations"/>
<parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="IDT.true.polynominal.attribute"/>
<parameter key="1" value="ZIP.true.polynominal.attribute"/>
<parameter key="2" value="COLOR.true.polynominal.attribute"/>
<parameter key="3" value="MODEL.true.polynominal.attribute"/>
<parameter key="4" value="SEGMENT.true.polynominal.attribute"/>
<parameter key="5" value="YEAR.true.integer.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
<description align="center" color="transparent" colored="false" width="126">UIO</description>
</operator>
<operator activated="true" class="append" compatibility="9.0.002" expanded="true" height="82" name="Append (3)" width="90" x="313" y="1207"/>
<operator activated="true" class="filter_examples" compatibility="9.0.002" expanded="true" height="103" name="Filter Examples (4)" width="90" x="447" y="1207">
<list key="filters_list">
<parameter key="filters_entry_key" value="YEAR.ge.2012"/>
</list>
</operator>
<operator activated="true" class="numerical_to_polynominal" compatibility="9.0.002" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="581" y="1156">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="YEAR"/>
</operator>
<operator activated="true" class="aggregate" compatibility="9.0.002" expanded="true" height="82" name="Aggregate (3)" width="90" x="782" y="1258">
<list key="aggregation_attributes">
<parameter key="IDT" value="count"/>
</list>
<parameter key="group_by_attributes" value="IDT|YEAR"/>
</operator>
<operator activated="true" class="pivot" compatibility="9.0.002" expanded="true" height="82" name="Pivot (3)" width="90" x="1050" y="1156">
<parameter key="group_attribute" value="IDT"/>
<parameter key="index_attribute" value="YEAR"/>
<parameter key="consider_weights" value="false"/>
<parameter key="skip_constant_attributes" value="false"/>
</operator>
<operator activated="true" class="concurrency:join" compatibility="9.0.002" expanded="true" height="82" name="Join (2)" width="90" x="1117" y="1258">
<list key="key_attributes">
<parameter key="IDT" value="IDT"/>
</list>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="9.0.002" expanded="true" height="82" name="Rename by Replacing" width="90" x="1251" y="1207">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="count(IDT)"/>
<parameter key="regular_expression" value=".*count(VIN)"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="replace_what" value=" count"/>
</operator>
<connect from_op="Read Excel (3)" from_port="output" to_op="Append (3)" to_port="example set 1"/>
<connect from_op="Append (3)" from_port="merged set" to_op="Filter Examples (4)" to_port="example set input"/>
<connect from_op="Filter Examples (4)" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
<connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Aggregate (3)" to_port="example set input"/>
<connect from_op="Aggregate (3)" from_port="example set output" to_op="Pivot (3)" to_port="example set input"/>
<connect from_op="Aggregate (3)" from_port="original" to_op="Join (2)" to_port="right"/>
<connect from_op="Pivot (3)" from_port="example set output" to_op="Join (2)" to_port="left"/>
<connect from_op="Join (2)" from_port="join" to_op="Rename by Replacing" to_port="example set input"/>
<connect from_op="Rename by Replacing" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Best Answer
-
BalazsBarany Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert Posts: 955 UnicornHi!
In this case, you need the regexp in two places.
attribute filter type: regular_expression
regular expression: count\(IDT\)_.+ (the .+ is necessary because in this context RapidMiner wants to match the entire string)
replace what: count\(IDT\)_ (here it is fine without the .+ because we really just match this part of the string)
Regards,
Balázs1
Answers
The parentheses in count(IDT)_ have a special meaning in regular expressions. This would be the correct one: count\(IDT\)_
Regards,
Balázs
1 - not identifying the attributes
2 - regex still not recognizing the count\(IDT\)_ pattern to replace with nothing.
Any other ideas would be helpful
Yes, RapidMiner requires matching the full string for the attribute selection, not just a part of the string. The .+ after count\(IDT\)_ matches the variable part, in your case the year.
Regards,
Balázs