RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

"Substantial bug in scoring"

stereotaxonstereotaxon Member Posts: 10 Contributor II
edited May 2019 in Help
Hello,

I've come across a pretty big bug. I build a model and then I read in a new dataset and try to score it.
The numeric attribute information stays the same, but the names I use as labels are written incorrectly. Is there
a fix or workaround for this problem?  I'm using WinXP and RapidMiner Community 4.2.0000.

Thanks, Mike



======= debug1.csv (TRAIN) =========

name,var1,var2,var3
Jimi,0.352612363,0.590121045,0.564992742
Janis,0.922569485,0.790112692,0.00504262
Bob,0.766240589,0.908079931,0.734902274
Peter,0.460154945,0.464329674,0.686559339
Paul,0.393046641,0.393054941,0.910596227
Mary,0.322384817,0.403900951,0.176867868
Joni,0.466668921,0.366803665,0.230654245

======= debug2.csv (SCORE) =========
* SAME NAME AS IN TRAINING DATASET
name,var1,var2,var3
Buddy,,0.576962167,0.318579208
Ringo,,0.571770059,0.977731173
Peter,,0.464329674,0.686559339 *
Paul,,0.393054941,0.910596227  *
Mary,,0.403900951,0.176867868  *
Joni,,0.366803665,0.230654245  *
John,,0.565080214,0.509177042
Jimi,,0.590121045,0.564992742  *
Jim,,0.864104026,0.839055131
Janis,,0.790112692,0.00504262  *
Bob,,0.908079931,0.734902274  *

======= SCORING OUTPUT =============
ROW NAME VAR1 PREDICTION(VAR1) VAR2 VAR3
1 Buddy NaN 0.506006992847086 0.577 0.319
2 Ringo NaN 0.5026999432485151 0.572 0.978
3 Paul NaN 0.4312676719193833 0.464 0.687
4 Mary NaN 0.38430756761967644 0.393 0.911
5 Joni NaN 0.39158307673653237 0.404 0.177
6 John NaN 0.3671109097071076 0.367 0.231<< *** ATTRIBUTES ARE OK
7 John NaN 0.4980700738105158 0.565 0.509<< *** BUT THE NAMES ARE WRONG!
8 Buddy NaN 0.5146053218033704 0.59 0.565
9 Jim NaN 0.8006220695941839 0.864 0.839
10 Ringo NaN 0.7430405001131846 0.79 0.0050
11 Peter NaN 0.8348597595558593 0.908 0.735


<operator name="Root" class="Process" expanded="yes">
    <operator name="Model" class="OperatorChain" expanded="yes">
        <operator name="CSVExampleSource" class="CSVExampleSource" breakpoints="after">
            <parameter key="filename" value="c:\debug1.csv"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ExcelExampleSource" class="ExcelExampleSource" breakpoints="after" activated="no">
            <parameter key="excel_file" value="c:\debug1.xls"/>
            <parameter key="first_row_as_names" value="true"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="W-M5P" class="W-M5P">
            <parameter key="keep_example_set" value="true"/>
        </operator>
        <operator name="ModelApplier" class="ModelApplier">
            <list key="application_parameters">
            </list>
            <parameter key="keep_model" value="true"/>
        </operator>
        <operator name="ModelWriter" class="ModelWriter">
            <parameter key="model_file" value="c:\debug.mod"/>
            <parameter key="output_type" value="XML"/>
        </operator>
        <operator name="IOConsumer" class="IOConsumer">
            <parameter key="io_object" value="ExampleSet"/>
        </operator>
        <operator name="IOConsumer (2)" class="IOConsumer">
            <parameter key="io_object" value="Model"/>
        </operator>
    </operator>
    <operator name="Score" class="OperatorChain" expanded="yes">
        <operator name="CSVExampleSource (2)" class="CSVExampleSource" breakpoints="after" activated="no">
            <parameter key="filename" value="c:\debug2.csv"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ExcelExampleSource (2)" class="ExcelExampleSource">
            <parameter key="excel_file" value="c:\debug2.xls"/>
            <parameter key="first_row_as_names" value="true"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ModelLoader" class="ModelLoader">
            <parameter key="model_file" value="c:\debug.mod"/>
        </operator>
        <operator name="ModelApplier (2)" class="ModelApplier">
            <list key="application_parameters">
            </list>
        </operator>
        <operator name="CSVExampleSetWriter" class="CSVExampleSetWriter" breakpoints="after">
            <parameter key="column_separator" value=","/>
            <parameter key="csv_file" value="c:\debug3.csv"/>
        </operator>
    </operator>
</operator>
Tagged:

Answers

  • steffensteffen Member Posts: 347  Maven
    Hello

    uh, this is really not nice. Another "Nominal Mapping sealed its own doom"-error I suppose...

    here is a workaround:
    I simply create a new id, apply the model and then restore the old id. Since "ExampleSetJoin" is used, your set should not be that large...
    <operator name="Root" class="Process" expanded="yes">
        <operator name="Model" class="OperatorChain" expanded="yes">
            <operator name="CSVExampleSource" class="CSVExampleSource">
                <parameter key="filename" value="debug1.csv"/>
                <parameter key="id_column" value="1"/>
                <parameter key="label_column" value="2"/>
            </operator>
            <operator name="W-M5P" class="W-M5P">
                <parameter key="keep_example_set" value="true"/>
            </operator>
            <operator name="ModelApplier" class="ModelApplier">
                <list key="application_parameters">
                </list>
                <parameter key="keep_model" value="true"/>
            </operator>
            <operator name="ModelWriter" class="ModelWriter">
                <parameter key="model_file" value="debug.mod"/>
                <parameter key="output_type" value="XML"/>
            </operator>
            <operator name="IOConsumer" class="IOConsumer">
                <parameter key="io_object" value="ExampleSet"/>
            </operator>
            <operator name="IOConsumer (2)" class="IOConsumer">
                <parameter key="io_object" value="Model"/>
            </operator>
        </operator>
        <operator name="Score" class="OperatorChain" expanded="yes">
            <operator name="CSVExampleSource (2)" class="CSVExampleSource">
                <parameter key="filename" value="debug2.csv"/>
                <parameter key="id_column" value="1"/>
                <parameter key="label_column" value="2"/>
            </operator>
            <operator name="ModelLoader" class="ModelLoader">
                <parameter key="model_file" value="debug.mod"/>
            </operator>
            <operator name="ChangeAttributeRole" class="ChangeAttributeRole">
                <parameter key="name" value="name"/>
                <parameter key="target_role" value="ignore"/>
            </operator>
            <operator name="IdTagging" class="IdTagging">
            </operator>
            <operator name="IOMultiplier" class="IOMultiplier">
                <parameter key="io_object" value="ExampleSet"/>
            </operator>
            <operator name="reduce_id_saver_set" class="OperatorChain" expanded="yes">
                <operator name="remove_all_except_id_name" class="FeatureNameFilter">
                    <parameter key="except_features_with_name" value="id||name"/>
                    <parameter key="filter_special_features" value="true"/>
                    <parameter key="skip_features_with_name" value=".*"/>
                </operator>
            </operator>
            <operator name="IOSelector" class="IOSelector">
                <parameter key="io_object" value="ExampleSet"/>
                <parameter key="select_which" value="2"/>
            </operator>
            <operator name="remove_name" class="FeatureNameFilter">
                <parameter key="filter_special_features" value="true"/>
                <parameter key="skip_features_with_name" value="name"/>
            </operator>
            <operator name="ModelApplier (2)" class="ModelApplier">
                <list key="application_parameters">
                </list>
                <parameter key="keep_model" value="true"/>
            </operator>
            <operator name="ExampleSetJoin" class="ExampleSetJoin">
            </operator>
            <operator name="restore_old_id" class="ChangeAttributeRole">
                <parameter key="name" value="name"/>
                <parameter key="target_role" value="id"/>
            </operator>
            <operator name="CSVExampleSetWriter" class="CSVExampleSetWriter">
                <parameter key="column_separator" value=","/>
                <parameter key="csv_file" value="debug4.csv"/>
            </operator>
        </operator>
    </operator>

    hope this was helpful

    Steffen

    PS: Setting the old id to "inactive" does not work (as expected)
    PPS: Note that there is an attachement-function. The function is hidden under "additional options" in the postreply-dialog. So you are able to add xmls and saved .csvs to your post (of moderate size)
  • stereotaxonstereotaxon Member Posts: 10 Contributor II
    Thanks for the workaround. 
    -Mike
Sign In or Register to comment.