RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

How to loop through all files in a directory and change macro values for each file?

asnani_sorathasnani_sorath Member Posts: 5 Contributor I
edited December 2018 in Help

Hi, 
I am using Linear Regression Model to train a Model. I have several training and test files stored in different folders. 

For example, 
Training files are stored in:
D:/path/to/training/files/s2_merged_train.csv
D:/path/to/training/files/s3_merged_train.csv
D:/path/to/training/files/s5_merged_train.csv
....

Similarly the Testing files are stored as:
D:/path/to/testing/files/s2_merged_test.csv
D:/path/to/testing/files/s3_merged_test.csv
D:/path/to/testing/files/s5_merged_test.csv
.....
There are 271 training and 271 testing files. File names have the same pattern but they are not continuous, that is, there may be s2_merged_train.csv, s3_merged_train.csv but may not be s4_merged_train.csv. Similarly for the test files. 
I have written the following process which is working correctly for given inputs. For file names and some attribute name, I am using macros, and passing the macro values through command line.
I am also storing the resulting file and the performance vectors in files. 
I want to execute that process for all the training and testing files without inserting the macro values manually. 
After searching on the internet I found that there is a Loop Files operator in RapidMiner which can be used to solve the problem, but I am having trouble in changing the macro values for each file. 
My xml code is as follows:

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
<parameter key="csv_file" value="%{training-file}"/>
<parameter key="column_separators" value=","/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="&quot;"/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information"/>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="34">
<parameter key="attribute_name" value="Time"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="keep_old_attribute" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV (2)" width="90" x="45" y="289">
<parameter key="csv_file" value="%{testing-file}"/>
<parameter key="column_separators" value=","/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="&quot;"/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information"/>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store" width="90" x="313" y="34">
<parameter key="repository_entry" value="%{training-repository}"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="447" y="34">
<parameter key="attribute_name" value="%{training-role-attribute-name}"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="linear_regression" compatibility="8.1.000" expanded="true" height="103" name="Linear Regression" width="90" x="581" y="34">
<parameter key="feature_selection" value="M5 prime"/>
<parameter key="alpha" value="0.05"/>
<parameter key="max_iterations" value="10"/>
<parameter key="forward_alpha" value="0.05"/>
<parameter key="backward_alpha" value="0.05"/>
<parameter key="eliminate_colinear_features" value="true"/>
<parameter key="min_tolerance" value="0.05"/>
<parameter key="use_bias" value="true"/>
<parameter key="ridge" value="1.0E-8"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date (2)" width="90" x="179" y="289">
<parameter key="attribute_name" value="Time"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="keep_old_attribute" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (2)" width="90" x="313" y="289">
<parameter key="repository_entry" value="%{testing-repository}"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="289">
<parameter key="attribute_name" value="%{testing-role-attribute-name}"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="apply_model" compatibility="8.1.000" expanded="true" height="82" name="Apply Model" width="90" x="715" y="187">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="performance_regression" compatibility="8.1.000" expanded="true" height="82" name="Performance" width="90" x="849" y="187">
<parameter key="main_criterion" value="absolute_error"/>
<parameter key="root_mean_squared_error" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="true"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="prediction_average" value="true"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="write_as_text" compatibility="8.1.000" expanded="true" height="82" name="Write as Text" width="90" x="1050" y="85">
<parameter key="result_file" value="%{performance-file}"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="format_numbers" compatibility="8.1.000" expanded="true" height="82" name="Format Numbers" width="90" x="983" y="238">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="attribute" value="Time"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="format_type" value="integer"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="use_grouping" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="write_csv" compatibility="8.1.000" expanded="true" height="82" name="Write CSV" width="90" x="1117" y="238">
<parameter key="csv_file" value="%{result-file}"/>
<parameter key="column_separator" value=","/>
<parameter key="write_attribute_names" value="true"/>
<parameter key="quote_nominal_values" value="true"/>
<parameter key="format_date_attributes" value="true"/>
<parameter key="append_to_file" value="false"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (3)" width="90" x="1251" y="238">
<parameter key="repository_entry" value="%{result-repository}"/>
</operator>
</process>

For executing through command line, I am using the following command in Windows cmd:

C:/Softwares/RapidMiner/RapidMinerStudio/scripts>rapidminer-batch.bat "//Local Repository/processes/rp2" "-Mtraining-file=D:\ME_Thesis\Data_v2\experiments\x_prediction_time\10min_ahead\training\input_training_merged\s2_merged_train.csv" "-Mtraining-repository=//Local Repository/data/s2_train.csv" "-Mtraining-role-attribute-name=s2predicted" "-Mtesting-file=D:\ME_Thesis\Data_v2\experiments\x_prediction_time\10min_ahead\testing\input_testing_merged\s2_merged_test.csv" "-Mtesting-repository=//Local Repository/data/s2_test.csv" "-Mtesting-role-attribute-name=s2predicted" "-Mperformance-file=D:\ME_Thesis\Data_v2\experiments\x_prediction_time\10min_ahead\performance\s2_performance.res" "-Mresult-file=D:\ME_Thesis\Data_v2\experiments\x_prediction_time\10min_ahead\results\s2_result.csv" "-Mresult-repository=//Local Repository/data/s2_result.csv"

There may be 2 solutions for the mentioned problem. 
1. Use RapidMiner operators to execute the process on all the available files
2. Write a batch file and include the the commands with parameters. 
I don't have any prior experience in writing Windows Batch Files. So I would prefer the first solution. 
Can anyone guide me how to use Loop operators and macros for this purpose? 
Any help would be highly appreciated. 
Thanks. 

Answers

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761   Unicorn

    @asnani_sorath Your XML file is not valid, please open the XML view and and cut and paste from there. 

  • asnani_sorathasnani_sorath Member Posts: 5 Contributor I

    I have copied the following code from .rmp file. I hope it would be correct now.

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
    <parameter key="csv_file" value="%{training-file}"/>
    <parameter key="column_separators" value=","/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="true"/>
    <list key="annotations"/>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="SYSTEM"/>
    <parameter key="read_all_values_as_polynominal" value="false"/>
    <list key="data_set_meta_data_information"/>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
    </operator>
    <operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="34">
    <parameter key="attribute_name" value="Time"/>
    <parameter key="date_type" value="date_time"/>
    <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="keep_old_attribute" value="false"/>
    </operator>
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV (2)" width="90" x="45" y="289">
    <parameter key="csv_file" value="%{testing-file}"/>
    <parameter key="column_separators" value=","/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="true"/>
    <list key="annotations"/>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="SYSTEM"/>
    <parameter key="read_all_values_as_polynominal" value="false"/>
    <list key="data_set_meta_data_information"/>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store" width="90" x="313" y="34">
    <parameter key="repository_entry" value="%{training-repository}"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="447" y="34">
    <parameter key="attribute_name" value="%{training-role-attribute-name}"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="linear_regression" compatibility="8.1.000" expanded="true" height="103" name="Linear Regression" width="90" x="581" y="34">
    <parameter key="feature_selection" value="M5 prime"/>
    <parameter key="alpha" value="0.05"/>
    <parameter key="max_iterations" value="10"/>
    <parameter key="forward_alpha" value="0.05"/>
    <parameter key="backward_alpha" value="0.05"/>
    <parameter key="eliminate_colinear_features" value="true"/>
    <parameter key="min_tolerance" value="0.05"/>
    <parameter key="use_bias" value="true"/>
    <parameter key="ridge" value="1.0E-8"/>
    </operator>
    <operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date (2)" width="90" x="179" y="289">
    <parameter key="attribute_name" value="Time"/>
    <parameter key="date_type" value="date_time"/>
    <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="keep_old_attribute" value="false"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (2)" width="90" x="313" y="289">
    <parameter key="repository_entry" value="%{testing-repository}"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (2)" width="90" x="447" y="289">
    <parameter key="attribute_name" value="%{testing-role-attribute-name}"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.000" expanded="true" height="82" name="Apply Model" width="90" x="715" y="187">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="8.1.000" expanded="true" height="82" name="Performance" width="90" x="849" y="187">
    <parameter key="main_criterion" value="absolute_error"/>
    <parameter key="root_mean_squared_error" value="true"/>
    <parameter key="absolute_error" value="true"/>
    <parameter key="relative_error" value="false"/>
    <parameter key="relative_error_lenient" value="false"/>
    <parameter key="relative_error_strict" value="false"/>
    <parameter key="normalized_absolute_error" value="true"/>
    <parameter key="root_relative_squared_error" value="false"/>
    <parameter key="squared_error" value="false"/>
    <parameter key="correlation" value="false"/>
    <parameter key="squared_correlation" value="false"/>
    <parameter key="prediction_average" value="true"/>
    <parameter key="spearman_rho" value="false"/>
    <parameter key="kendall_tau" value="false"/>
    <parameter key="skip_undefined_labels" value="true"/>
    <parameter key="use_example_weights" value="true"/>
    </operator>
    <operator activated="true" class="write_as_text" compatibility="8.1.000" expanded="true" height="82" name="Write as Text" width="90" x="1050" y="85">
    <parameter key="result_file" value="%{performance-file}"/>
    <parameter key="encoding" value="SYSTEM"/>
    </operator>
    <operator activated="true" class="format_numbers" compatibility="8.1.000" expanded="true" height="82" name="Format Numbers" width="90" x="983" y="238">
    <parameter key="attribute_filter_type" value="value_type"/>
    <parameter key="attribute" value="Time"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="numeric"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="real"/>
    <parameter key="block_type" value="value_series"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_series_end"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="true"/>
    <parameter key="format_type" value="integer"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="use_grouping" value="false"/>
    </operator>
    <operator activated="true" class="write_csv" compatibility="8.1.000" expanded="true" height="82" name="Write CSV" width="90" x="1117" y="238">
    <parameter key="csv_file" value="%{result-file}"/>
    <parameter key="column_separator" value=","/>
    <parameter key="write_attribute_names" value="true"/>
    <parameter key="quote_nominal_values" value="true"/>
    <parameter key="format_date_attributes" value="true"/>
    <parameter key="append_to_file" value="false"/>
    <parameter key="encoding" value="SYSTEM"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (3)" width="90" x="1251" y="238">
    <parameter key="repository_entry" value="%{result-repository}"/>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Nominal to Date" to_port="example set input"/>
    <connect from_op="Nominal to Date" from_port="example set output" to_op="Store" to_port="input"/>
    <connect from_op="Read CSV (2)" from_port="output" to_op="Nominal to Date (2)" to_port="example set input"/>
    <connect from_op="Store" from_port="through" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Linear Regression" to_port="training set"/>
    <connect from_op="Linear Regression" from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_op="Nominal to Date (2)" from_port="example set output" to_op="Store (2)" to_port="input"/>
    <connect from_op="Store (2)" from_port="through" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_op="Write as Text" to_port="input 1"/>
    <connect from_op="Performance" from_port="example set" to_op="Format Numbers" to_port="example set input"/>
    <connect from_op="Write as Text" from_port="input 1" to_port="result 1"/>
    <connect from_op="Format Numbers" from_port="example set output" to_op="Write CSV" to_port="input"/>
    <connect from_op="Write CSV" from_port="through" to_op="Store (3)" to_port="input"/>
    <connect from_op="Store (3)" from_port="through" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>
  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761   Unicorn

    Ok, that works. I think you want to use the Loop Files operator to load in your training and testing data sets. This way you can use pre-defined macros like %{file_name} and so forth.

    sgenzer
  • asnani_sorathasnani_sorath Member Posts: 5 Contributor I

    Dear Thomas, can you kindly provide an example of using that? 
    Should I use 2 Loop Files operators? 1 for training files and another for testing files? 
    And in each of my files, the label attribute is names as: 's2predicted' in files s2_merged_train.csv. and s2_merged_test-csv; 's3predicted' in file 's3_merged_train.csv' and 's3_merged_test.csv' and so on. 
    How to use macros for changing these values according to each file?


    Thanks. 

  • asnani_sorathasnani_sorath Member Posts: 5 Contributor I

    Is there no one to help? I am constantly searching and trying but no results. 

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761   Unicorn

    @asnani_sorath next time use the '@' symbol to get someone's attention, we're all busy with work and spend our time on the Community freely. Response time varies.

     

    W.R.T to your process, I would do something like this. In addition you need to define your macros like %{performance} first before you can use them. That's another reason why things break. 

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="concurrency:loop_files" compatibility="8.1.000" expanded="true" height="82" name="Loop Files" width="90" x="112" y="34">
    <parameter key="directory" value="PATH-TO FILES"/>
    <parameter key="filter_type" value="regex"/>
    <parameter key="enable_macros" value="true"/>
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
    <parameter key="column_separators" value=","/>
    <list key="annotations"/>
    <parameter key="encoding" value="SYSTEM"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date" width="90" x="179" y="34">
    <parameter key="attribute_name" value="Time"/>
    <parameter key="date_type" value="date_time"/>
    <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store" width="90" x="313" y="34">
    <parameter key="repository_entry" value="%{file_name}"/>
    <description align="center" color="transparent" colored="false" width="126">Path with ${file_name}</description>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="447" y="34">
    <parameter key="attribute_name" value="%{training-role-attribute-name}"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <connect from_op="Read CSV" from_port="output" to_op="Nominal to Date" to_port="example set input"/>
    <connect from_op="Nominal to Date" from_port="example set output" to_op="Store" to_port="input"/>
    <connect from_op="Store" from_port="through" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_port="output 1"/>
    <portSpacing port="source_file object" spacing="0"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="linear_regression" compatibility="8.1.000" expanded="true" height="103" name="Linear Regression" width="90" x="581" y="34"/>
    <operator activated="true" class="concurrency:loop_files" compatibility="8.1.000" expanded="true" height="82" name="Loop Files (2)" width="90" x="782" y="187">
    <parameter key="directory" value="PATH TO DIRECTORY"/>
    <parameter key="filter_type" value="regex"/>
    <parameter key="enable_macros" value="true"/>
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read CSV (2)" width="90" x="112" y="238">
    <parameter key="column_separators" value=","/>
    <list key="annotations"/>
    <parameter key="encoding" value="SYSTEM"/>
    <list key="data_set_meta_data_information"/>
    </operator>
    <operator activated="true" class="nominal_to_date" compatibility="8.1.000" expanded="true" height="82" name="Nominal to Date (2)" width="90" x="246" y="238">
    <parameter key="attribute_name" value="Time"/>
    <parameter key="date_type" value="date_time"/>
    <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (2)" width="90" x="380" y="238">
    <parameter key="repository_entry" value="%{file_name}"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (2)" width="90" x="514" y="238">
    <parameter key="attribute_name" value="%{testing-role-attribute-name}"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.000" expanded="true" height="82" name="Apply Model" width="90" x="715" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_regression" compatibility="8.1.000" expanded="true" height="82" name="Performance" width="90" x="715" y="238">
    <parameter key="main_criterion" value="absolute_error"/>
    <parameter key="absolute_error" value="true"/>
    <parameter key="normalized_absolute_error" value="true"/>
    <parameter key="prediction_average" value="true"/>
    </operator>
    <operator activated="true" class="write_as_text" compatibility="8.1.000" expanded="true" height="82" name="Write as Text" width="90" x="916" y="136">
    <parameter key="result_file" value="%{performance-file}"/>
    <parameter key="encoding" value="SYSTEM"/>
    </operator>
    <operator activated="true" class="format_numbers" compatibility="8.1.000" expanded="true" height="82" name="Format Numbers" width="90" x="849" y="289">
    <parameter key="attribute_filter_type" value="value_type"/>
    <parameter key="attribute" value="Time"/>
    <parameter key="include_special_attributes" value="true"/>
    <parameter key="format_type" value="integer"/>
    </operator>
    <operator activated="true" class="write_csv" compatibility="8.1.000" expanded="true" height="82" name="Write CSV" width="90" x="983" y="289">
    <parameter key="csv_file" value="%file_name}_results"/>
    <parameter key="column_separator" value=","/>
    <parameter key="encoding" value="SYSTEM"/>
    </operator>
    <operator activated="true" class="store" compatibility="8.1.000" expanded="true" height="68" name="Store (3)" width="90" x="1117" y="289">
    <parameter key="repository_entry" value="%{file_name}_results"/>
    </operator>
    <connect from_port="input 1" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Read CSV (2)" from_port="output" to_op="Nominal to Date (2)" to_port="example set input"/>
    <connect from_op="Nominal to Date (2)" from_port="example set output" to_op="Store (2)" to_port="input"/>
    <connect from_op="Store (2)" from_port="through" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Apply Model" to_port="model"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <connect from_op="Performance" from_port="performance" to_op="Write as Text" to_port="input 1"/>
    <connect from_op="Performance" from_port="example set" to_op="Format Numbers" to_port="example set input"/>
    <connect from_op="Format Numbers" from_port="example set output" to_op="Write CSV" to_port="input"/>
    <connect from_op="Write CSV" from_port="through" to_op="Store (3)" to_port="input"/>
    <connect from_op="Store (3)" from_port="through" to_port="output 1"/>
    <portSpacing port="source_file object" spacing="0"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="source_input 2" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Loop Files" from_port="output 1" to_op="Linear Regression" to_port="training set"/>
    <connect from_op="Linear Regression" from_port="model" to_op="Loop Files (2)" to_port="input 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    </process>
    </operator>
    </process>
    sgenzer
  • asnani_sorathasnani_sorath Member Posts: 5 Contributor I

    Dear@Thomas_Ott, thank you very much for your response. 
    Your suggested process didn't work for me because the Linear Regression operator is out of the loop, so it won't execute on all files. 
    Never mind, I have solved the problem by generating a file with all the possible parameter values, and I have used 'Loop Examples' operator instead of 'Loop Files'. Now it is working perfectly fine. 
    Many thanks for your precious time. 
    Regards, 
    Sorath Asnani

    sgenzerThomas_Ott
Sign In or Register to comment.