Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
continurous learn or auto machine learn
Hi All, I have a trained textmining model, I want to add more exampleset to traning, but I do not want to train everything from zero. Can rapidminer provide continurous learn or auto machine learn? let us traing existing model with more exampleset and classification.
Thanks!
Tagged:
0
Best Answers
-
David_A Administrator, Moderator, Employee, RMResearcher, Member Posts: 297 RM ResearchHi @wang0581 ,you can use the Update Model operator to add new training data to your model. Unfortunately this moslty works with very simply algorithms like Naive Bayes or k-NN and Deep Neural Networks.Below you find the tutorial process from the Deep Learning extension.I hope this helps in our situation.Best,
David----------------------------------<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001"><context><input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="45" y="187">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Abalone" origin="GENERATED_TUTORIAL" width="90" x="112" y="136">
<parameter key="repository_entry" value="//Samples/Deep Learning/data/Abalone"/>
<description align="center" color="transparent" colored="false" width="126">Loading data</description>
</operator>
<operator activated="true" class="set_role" compatibility="9.5.001" expanded="true" height="82" name="Set Role" origin="GENERATED_TUTORIAL" width="90" x="313" y="136">
<parameter key="attribute_name" value="Age"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
<description align="center" color="transparent" colored="false" width="126">Choosing 'Age' as the label.</description>
</operator>
<operator activated="true" class="nominal_to_numerical" compatibility="9.5.001" expanded="true" height="103" name="Nominal to Numerical" origin="GENERATED_TUTORIAL" width="90" x="514" y="136">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="coding_type" value="dummy coding"/>
<parameter key="use_comparison_groups" value="false"/>
<list key="comparison_groups"/>
<parameter key="unexpected_value_handling" value="all 0 and warning"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
<connect from_op="Retrieve Abalone" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Nominal to Numerical" to_port="example set input"/>
<connect from_op="Nominal to Numerical" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<description align="center" color="purple" colored="true" height="222" resized="true" width="186" x="264" y="48">We want to estimate the 'Age' of sea shells through measuring it.</description>
<description align="center" color="purple" colored="true" height="222" resized="true" width="186" x="466" y="47">Neural networks only work on numerical data, therefore we need to convert all attributes to numericals.</description>
</process>
<description align="center" color="transparent" colored="false" width="126">Load and prepare data</description>
</operator>
<operator activated="true" class="split_data" compatibility="9.5.001" expanded="true" height="124" name="Split Data" origin="GENERATED_TUTORIAL" width="90" x="179" y="187">
<enumeration key="partitions">
<parameter key="ratio" value="0.2"/>
<parameter key="ratio" value="0.8"/>
<parameter key="ratio" value="0.2"/>
</enumeration>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<description align="center" color="transparent" colored="false" width="126">Split data into two training and one test sets.</description>
</operator>
<operator activated="true" class="multiply" compatibility="9.5.001" expanded="true" height="103" name="Multiply" origin="GENERATED_TUTORIAL" width="90" x="313" y="340">
<description align="center" color="transparent" colored="false" width="126">Copy the test set to use the idential data for both model performance checks.</description>
</operator>
<operator activated="true" class="deeplearning:dl4j_sequential_neural_network" compatibility="0.9.001" expanded="true" height="103" name="Deep Learning" origin="GENERATED_TUTORIAL" width="90" x="313" y="85">
<parameter key="loss_function" value="Mean Squared Error (Linear Regression)"/>
<parameter key="epochs" value="50"/>
<parameter key="use_miniBatch" value="true"/>
<parameter key="batch_size" value="4"/>
<parameter key="updater" value="Adam"/>
<parameter key="learning_rate" value="0.005"/>
<parameter key="momentum" value="0.9"/>
<parameter key="rho" value="0.95"/>
<parameter key="epsilon" value="1.0E-6"/>
<parameter key="beta1" value="0.9"/>
<parameter key="beta2" value="0.999"/>
<parameter key="RMSdecay" value="0.95"/>
<parameter key="weight_initialization" value="Xavier"/>
<parameter key="bias_initialization" value="0.0"/>
<parameter key="use_regularization" value="false"/>
<parameter key="l1_strength" value="0.1"/>
<parameter key="l2_strength" value="0.1"/>
<parameter key="optimization_method" value="Stochastic Gradient Descent"/>
<parameter key="backpropagation" value="Standard"/>
<parameter key="backpropagation_length" value="50"/>
<parameter key="infer_input_shape" value="true"/>
<parameter key="network_type" value="Simple Neural Network"/>
<parameter key="log_each_epoch" value="true"/>
<parameter key="epochs_per_log" value="10"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<process expanded="true">
<operator activated="true" class="deeplearning:dl4j_dense_layer" compatibility="0.9.001" expanded="true" height="68" name="Add Fully-Connected Layer" origin="GENERATED_TUTORIAL" width="90" x="112" y="85">
<parameter key="number_of_neurons" value="8"/>
<parameter key="activation_function" value="ReLU (Rectified Linear Unit)"/>
<parameter key="use_dropout" value="false"/>
<parameter key="dropout_rate" value="0.25"/>
<parameter key="overwrite_networks_weight_initialization" value="false"/>
<parameter key="weight_initialization" value="Normal"/>
<parameter key="overwrite_networks_bias_initialization" value="false"/>
<parameter key="bias_initialization" value="0.0"/>
</operator>
<operator activated="true" class="deeplearning:dl4j_dense_layer" compatibility="0.9.001" expanded="true" height="68" name="Add Fully-Connected Layer (2)" origin="GENERATED_TUTORIAL" width="90" x="313" y="85">
<parameter key="number_of_neurons" value="1"/>
<parameter key="activation_function" value="None (identity)"/>
<parameter key="use_dropout" value="false"/>
<parameter key="dropout_rate" value="0.25"/>
<parameter key="overwrite_networks_weight_initialization" value="false"/>
<parameter key="weight_initialization" value="Normal"/>
<parameter key="overwrite_networks_bias_initialization" value="false"/>
<parameter key="bias_initialization" value="0.0"/>
</operator>
<connect from_port="layerArchitecture" to_op="Add Fully-Connected Layer" to_port="layerArchitecture"/>
<connect from_op="Add Fully-Connected Layer" from_port="layerArchitecture" to_op="Add Fully-Connected Layer (2)" to_port="layerArchitecture"/>
<connect from_op="Add Fully-Connected Layer (2)" from_port="layerArchitecture" to_port="layerArchitecture"/>
<portSpacing port="source_layerArchitecture" spacing="0"/>
<portSpacing port="sink_layerArchitecture" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="9.5.001" expanded="true" height="82" name="Apply Model" origin="GENERATED_TUTORIAL" width="90" x="447" y="85">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="9.5.001" expanded="true" height="82" name="Performance" origin="GENERATED_TUTORIAL" width="90" x="648" y="85">
<parameter key="main_criterion" value="first"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="prediction_average" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Calculate model performance</description>
</operator>
<operator activated="true" class="update_model" compatibility="9.5.001" expanded="true" height="82" name="Update Model" origin="GENERATED_TUTORIAL" width="90" x="648" y="289"/>
<operator activated="true" class="apply_model" compatibility="9.5.001" expanded="true" height="82" name="Apply Model (2)" origin="GENERATED_TUTORIAL" width="90" x="782" y="340">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="9.5.001" expanded="true" height="82" name="Performance after Update" origin="GENERATED_TUTORIAL" width="90" x="916" y="340">
<parameter key="main_criterion" value="first"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="true"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="prediction_average" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<description align="center" color="transparent" colored="false" width="126">Calculate model performance</description>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Deep Learning" to_port="training set"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Update Model" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 3" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Multiply" from_port="output 2" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Deep Learning" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Apply Model" from_port="model" to_op="Update Model" to_port="model"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<connect from_op="Update Model" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance after Update" to_port="labelled data"/>
<connect from_op="Performance after Update" from_port="performance" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<description align="center" color="yellow" colored="false" height="54" resized="true" width="220" x="34" y="40">Update a previously trained model with new data.</description>
<description align="center" color="green" colored="true" height="196" resized="true" width="474" x="282" y="40">Initial training &amp; model evaluation</description>
<description align="center" color="blue" colored="true" height="232" resized="true" width="415" x="616" y="253">Perform one more iteration step and evaluate the model again.</description>
</process>
</operator>
</process>
7