Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Answers
Here's some code that undersamples the frequent class (class=yes) :
Paulo Praca
Dortmund, Germany
I could send you my example data if you are interested.
Thanks for your answer,
Paulo Praça
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Desentupimentos" width="90" x="45" y="187">
<parameter key="repository_entry" value="//Local Repository/data/Desentupimentos"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.0.001" expanded="true" height="82" name="Set Role" width="90" x="179" y="187">
<parameter key="attribute_name" value="Obstrucoes"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Obstrucoes|ANO_INSTALACAO|COD_MATERIAL|COMP|SECCAO|SISTEMA|TIPO_REDE"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="187">
<parameter key="parameter_expression" value="! ((missing([ANO_INSTALACAO])))"/>
<parameter key="condition_class" value="expression"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="COD_MATERIAL.does_not_equal.NC"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (2)" width="90" x="581" y="187">
<list key="filters_list">
<parameter key="filters_entry_key" value="SECCAO.ne.0\.0"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (3)" width="90" x="45" y="289">
<list key="filters_list">
<parameter key="filters_entry_key" value="COD_MATERIAL.does_not_equal.NC"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (4)" width="90" x="179" y="289">
<parameter key="parameter_expression" value="COMP>=10"/>
<parameter key="condition_class" value="expression"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="split_data" compatibility="7.0.001" expanded="true" height="103" name="Split Data" width="90" x="313" y="289">
<enumeration key="partitions">
<parameter key="ratio" value="0.7"/>
<parameter key="ratio" value="0.3"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="442"/>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (6)" width="90" x="313" y="646">
<list key="filters_list">
<parameter key="filters_entry_key" value="Obstrucoes.equals.sim"/>
</list>
</operator>
<operator activated="true" class="filter_examples" compatibility="7.0.001" expanded="true" height="103" name="Filter Examples (5)" width="90" x="313" y="442">
<list key="filters_list">
<parameter key="filters_entry_key" value="Obstrucoes.equals.nao"/>
</list>
</operator>
<operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="514" y="442">
<parameter key="sample_size" value="1800"/>
<list key="sample_size_per_class"/>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class"/>
</operator>
<operator activated="true" class="append" compatibility="7.0.001" expanded="true" height="103" name="Append" width="90" x="648" y="544"/>
<operator activated="true" class="parallel_decision_tree" compatibility="7.0.001" expanded="true" height="82" name="Decision Tree" width="90" x="849" y="544">
<parameter key="criterion" value="gini_index"/>
<parameter key="maximal_depth" value="8"/>
<parameter key="minimal_gain" value="0.001"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="983" y="238">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="7.0.001" expanded="true" height="82" name="Performance" width="90" x="1050" y="544"/>
<connect from_op="Retrieve Desentupimentos" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
<connect from_op="Filter Examples (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/>
<connect from_op="Filter Examples (4)" from_port="example set output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Multiply" to_port="input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Multiply" from_port="output 1" to_op="Filter Examples (5)" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples (6)" to_port="example set input"/>
<connect from_op="Filter Examples (6)" from_port="example set output" to_op="Append" to_port="example set 2"/>
<connect from_op="Filter Examples (5)" from_port="example set output" to_op="Sample" to_port="example set input"/>
<connect from_op="Sample" from_port="example set output" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Apply Model" from_port="model" to_port="result 3"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<connect from_op="Performance" from_port="example set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<background height="219" location="//Samples/Tutorials/Basics/06/tutorial6" width="2000" x="12" y="12"/>
</process>
</operator>
</process>
it is cool to see, that another civil engineer is working with RM. One of our Sales Engineers is actually civil engineer as well. Thomas Ott aka neuralmarkettrends (on twitter or youtube).
I think that this problem is indeed a good use case for data mining. I think that the usual points to look at are algorithm, Feature Selection and Feature Generation. Of course you can post data here and we as the community have a look on it.
~Martin
Dortmund, Germany