Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Parsing Latitudes and longitudes
Hello,
I am using rapidminer studio 9.6.000 on ubuntu 18.04.
I have latitude and longitudes values expressed as e.g. 50.833.333, where 50 is the degree, 833 is minutes and 333 is seconds. By default the values are loading as nominal. Not being able to use format numbers or parse numbers.
Can anyone please help me understand how I can use this values to plot maps in visualization?
Tagged:
0
Best Answer
-
Shourya Member Posts: 5 Learner II have solved the problem using R and I am attaching the working solution here.
1
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="-1"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34">
<parameter key="generator_type" value="comma separated text"/>
<parameter key="number_of_examples" value="100"/>
<parameter key="use_stepsize" value="false"/>
<list key="function_descriptions"/>
<parameter key="add_id_attribute" value="false"/>
<list key="numeric_series_configuration"/>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="input_csv_text" value="id,place,long,lat 1,Cardinia Reservoir Emerald VIC 3782,37.58.01.8.S,145.25.02.5.E 2,French Island VIC 3921,38.20.48.5.S,145.20.56.2.E 3,1020 Studewood St Houston TX 77008 United States,29.47.22.1.N,95.23.15.3.W"/>
<parameter key="column_separator" value=","/>
<parameter key="parse_all_as_nominal" value="false"/>
<parameter key="decimal_point_character" value="."/>
<parameter key="trim_attribute_names" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="34">
<list key="function_descriptions">
<parameter key="dd_long_deg" value="parse(replaceAll(long,"^([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_min" value="parse(replaceAll(long,"^[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_sec" value="parse(replaceAll(long,"^[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_msec" value="parse(replaceAll(long,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_sign" value="if(matches(long,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\.S.*$"),-1,1)"/>
<parameter key="dd_long" value="dd_long_sign*(dd_long_deg + (dd_long_min/60.0) + (dd_long_sec/3600.0)) + (dd_long_msec/1000000.0)"/>
<parameter key="dd_lat_deg" value="parse(replaceAll(lat,"^([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_min" value="parse(replaceAll(lat,"^[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_sec" value="parse(replaceAll(lat,"^[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_msec" value="parse(replaceAll(lat,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_sign" value="if(matches(lat,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\.W.*$"),-1,1)"/>
<parameter key="dd_lat" value="dd_long_sign*(dd_lat_deg + (dd_lat_min/60.0) + (dd_lat_sec/3600.0)) + (dd_lat_msec/1000000.0)"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Check Precision" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="dd_long_x" value="dd_long*1000"/>
<parameter key="dd_lat_x" value="dd_lat*1000"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Check Precision" to_port="example set input"/>
<connect from_op="Check Precision" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Also note that if you incorporate milliseconds, RapidMiner seems to be losing precision, which will translate into meters of difference.
JacobI'm the author of the GeoProcessing extension, feel free to ask me. It doesn't have an operator or conversion for this format, though.
Regards,
Balázs