Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Question about polynomial regression
Hi there:
I'm a newcomer here. I really need some help about polynomial regression.
This is the data I've given to polynomial regression.
201 data,the relationship of x and y is :y=3*x^2-2*x+8
And this what I got from polynomial regression:
Really confused, please help me.
I'm a newcomer here. I really need some help about polynomial regression.
This is the data I've given to polynomial regression.
x | y |
-100 | 30208 |
-99 | 29609 |
-98 | 29016 |
-97 | 28429 |
-96 | 27848 |
-95 | 27273 |
-94 | 26704 |
-93 | 26141 |
-92 | 25584 |
-91 | 25033 |
-90 | 24488 |
-89 | 23949 |
-88 | 23416 |
-87 | 22889 |
-86 | 22368 |
-85 | 21853 |
-84 | 21344 |
-83 | 20841 |
-82 | 20344 |
-81 | 19853 |
-80 | 19368 |
-79 | 18889 |
-78 | 18416 |
-77 | 17949 |
-76 | 17488 |
-75 | 17033 |
-74 | 16584 |
-73 | 16141 |
-72 | 15704 |
-71 | 15273 |
-70 | 14848 |
-69 | 14429 |
-68 | 14016 |
-67 | 13609 |
-66 | 13208 |
-65 | 12813 |
-64 | 12424 |
-63 | 12041 |
-62 | 11664 |
-61 | 11293 |
-60 | 10928 |
-59 | 10569 |
-58 | 10216 |
-57 | 9869 |
-56 | 9528 |
-55 | 9193 |
-54 | 8864 |
-53 | 8541 |
-52 | 8224 |
-51 | 7913 |
-50 | 7608 |
-49 | 7309 |
-48 | 7016 |
-47 | 6729 |
-46 | 6448 |
-45 | 6173 |
-44 | 5904 |
-43 | 5641 |
-42 | 5384 |
-41 | 5133 |
-40 | 4888 |
-39 | 4649 |
-38 | 4416 |
-37 | 4189 |
-36 | 3968 |
-35 | 3753 |
-34 | 3544 |
-33 | 3341 |
-32 | 3144 |
-31 | 2953 |
-30 | 2768 |
-29 | 2589 |
-28 | 2416 |
-27 | 2249 |
-26 | 2088 |
-25 | 1933 |
-24 | 1784 |
-23 | 1641 |
-22 | 1504 |
-21 | 1373 |
-20 | 1248 |
-19 | 1129 |
-18 | 1016 |
-17 | 909 |
-16 | 808 |
-15 | 713 |
-14 | 624 |
-13 | 541 |
-12 | 464 |
-11 | 393 |
-10 | 328 |
-9 | 269 |
-8 | 216 |
-7 | 169 |
-6 | 128 |
-5 | 93 |
-4 | 64 |
-3 | 41 |
-2 | 24 |
-1 | 13 |
0 | 8 |
1 | 9 |
2 | 16 |
3 | 29 |
4 | 48 |
5 | 73 |
6 | 104 |
7 | 141 |
8 | 184 |
9 | 233 |
10 | 288 |
11 | 349 |
12 | 416 |
13 | 489 |
14 | 568 |
15 | 653 |
16 | 744 |
17 | 841 |
18 | 944 |
19 | 1053 |
20 | 1168 |
21 | 1289 |
22 | 1416 |
23 | 1549 |
24 | 1688 |
25 | 1833 |
26 | 1984 |
27 | 2141 |
28 | 2304 |
29 | 2473 |
30 | 2648 |
31 | 2829 |
32 | 3016 |
33 | 3209 |
34 | 3408 |
35 | 3613 |
36 | 3824 |
37 | 4041 |
38 | 4264 |
39 | 4493 |
40 | 4728 |
41 | 4969 |
42 | 5216 |
43 | 5469 |
44 | 5728 |
45 | 5993 |
46 | 6264 |
47 | 6541 |
48 | 6824 |
49 | 7113 |
50 | 7408 |
51 | 7709 |
52 | 8016 |
53 | 8329 |
54 | 8648 |
55 | 8973 |
56 | 9304 |
57 | 9641 |
58 | 9984 |
59 | 10333 |
60 | 10688 |
61 | 11049 |
62 | 11416 |
63 | 11789 |
64 | 12168 |
65 | 12553 |
66 | 12944 |
67 | 13341 |
68 | 13744 |
69 | 14153 |
70 | 14568 |
71 | 14989 |
72 | 15416 |
73 | 15849 |
74 | 16288 |
75 | 16733 |
76 | 17184 |
77 | 17641 |
78 | 18104 |
79 | 18573 |
80 | 19048 |
81 | 19529 |
82 | 20016 |
83 | 20509 |
84 | 21008 |
85 | 21513 |
86 | 22024 |
87 | 22541 |
88 | 23064 |
89 | 23593 |
90 | 24128 |
91 | 24669 |
92 | 25216 |
93 | 25769 |
94 | 26328 |
95 | 26893 |
96 | 27464 |
97 | 28041 |
98 | 28624 |
99 | 29213 |
100 | 29808 |
201 data,the relationship of x and y is :y=3*x^2-2*x+8
And this what I got from polynomial regression:
2.998 * x ^ 2.000 + 22.316
Really confused, please help me.
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<operator activated="true" class="read_excel" compatibility="10.3.001" expanded="true" height="68" name="Read Excel" width="90" x="45" y="442">
<parameter key="excel_file" value="C:\Users\Administrator\Desktop\test1.xlsx"/>
<parameter key="sheet_selection" value="sheet number"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="A1"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="use_header_row" value="true"/>
<parameter key="header_row" value="1"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="英文 (美国)"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="x.true.integer.attribute"/>
<parameter key="1" value="y.true.integer.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<operator activated="true" class="blending:select_attributes" compatibility="10.3.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="442">
<parameter key="type" value="include attributes"/>
<parameter key="attribute_filter_type" value="a subset"/>
<parameter key="select_attribute" value=""/>
<parameter key="select_subset" value="x␞y"/>
<parameter key="also_apply_to_special_attributes_(id,_label..)" value="true"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<operator activated="true" class="blending:set_role" compatibility="10.3.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="442">
<list key="set_roles">
<parameter key="y" value="label"/>
</list>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<operator activated="true" class="split_data" compatibility="10.1.003" expanded="true" height="82" name="Split Data" origin="GENERATED_TUTORIAL" width="90" x="715" y="85">
<enumeration key="partitions">
<parameter key="ratio" value="0.7"/>
<parameter key="ratio" value="0.3"/>
</enumeration>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<operator activated="true" class="polynomial_regression" compatibility="10.3.001" expanded="true" height="82" name="Polynomial Regression" width="90" x="849" y="85">
<parameter key="max_iterations" value="5000"/>
<parameter key="replication_factor" value="1"/>
<parameter key="max_degree" value="5"/>
<parameter key="min_coefficient" value="-100.0"/>
<parameter key="max_coefficient" value="100.0"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
0
Best Answer
-
rjones13 Member Posts: 198 UnicornHi @FredLee,
The key parameter is the replication factor, which then allows it to have x appear twice in the equation. Please see the following process to give a pretty decent result for only 201 points.
Best,
Roland<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.3.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="open_file" compatibility="10.3.001" expanded="true" height="68" name="Open File" width="90" x="45" y="34">
<parameter key="resource_type" value="repository blob entry"/>
<parameter key="repository_entry" value="//Community-Content/poly.txt"/>
</operator>
<operator activated="true" class="read_csv" compatibility="10.3.001" expanded="true" height="68" name="Read CSV" width="90" x="179" y="34">
<parameter key="column_separators" value="\s+"/>
<parameter key="trim_lines" value="false"/>
<parameter key="multiline_text" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="""/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="starting_row" value="1"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="infinity_representation" value=""/>
<parameter key="date_format" value=""/>
<parameter key="use_header_row" value="true"/>
<parameter key="header_row" value="1"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="UTF-8"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information"/>
<parameter key="read_not_matching_values_as_missings" value="true"/>
</operator>
<operator activated="true" class="blending:set_role" compatibility="10.3.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="34">
<list key="set_roles">
<parameter key="y" value="label"/>
</list>
</operator>
<operator activated="true" class="polynomial_regression" compatibility="10.3.001" expanded="true" height="82" name="Polynomial Regression" width="90" x="447" y="34">
<parameter key="max_iterations" value="50000"/>
<parameter key="replication_factor" value="2"/>
<parameter key="max_degree" value="2"/>
<parameter key="min_coefficient" value="-8.0"/>
<parameter key="max_coefficient" value="8.0"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="apply_model" compatibility="10.3.001" expanded="true" height="82" name="Apply Model" width="90" x="581" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="10.3.001" expanded="true" height="82" name="Performance" width="90" x="514" y="187">
<parameter key="main_criterion" value="first"/>
<parameter key="root_mean_squared_error" value="true"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="prediction_average" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_op="Open File" from_port="file" to_op="Read CSV" to_port="file"/>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Polynomial Regression" to_port="training set"/>
<connect from_op="Polynomial Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Polynomial Regression" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Apply Model" from_port="model" to_port="result 1"/>
<connect from_op="Performance" from_port="performance" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>1
Answers
I've tested what you gave to me.
The result is very good.
And I know that if you want to get a good result you need to modify the parameters.
It really helps a lot.