The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.
Parsing Latitudes and longitudes
Hello,
I am using rapidminer studio 9.6.000 on ubuntu 18.04.
I have latitude and longitudes values expressed as e.g. 50.833.333, where 50 is the degree, 833 is minutes and 333 is seconds. By default the values are loading as nominal. Not being able to use format numbers or parse numbers.
Can anyone please help me understand how I can use this values to plot maps in visualization?
Tagged:
0
Best Answer
-
Shourya Member Posts: 5 NewbieI have solved the problem using R and I am attaching the working solution here.
1
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="-1"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="utility:create_exampleset" compatibility="9.6.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34">
<parameter key="generator_type" value="comma separated text"/>
<parameter key="number_of_examples" value="100"/>
<parameter key="use_stepsize" value="false"/>
<list key="function_descriptions"/>
<parameter key="add_id_attribute" value="false"/>
<list key="numeric_series_configuration"/>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="input_csv_text" value="id,place,long,lat 1,Cardinia Reservoir Emerald VIC 3782,37.58.01.8.S,145.25.02.5.E 2,French Island VIC 3921,38.20.48.5.S,145.20.56.2.E 3,1020 Studewood St Houston TX 77008 United States,29.47.22.1.N,95.23.15.3.W"/>
<parameter key="column_separator" value=","/>
<parameter key="parse_all_as_nominal" value="false"/>
<parameter key="decimal_point_character" value="."/>
<parameter key="trim_attribute_names" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="34">
<list key="function_descriptions">
<parameter key="dd_long_deg" value="parse(replaceAll(long,"^([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_min" value="parse(replaceAll(long,"^[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_sec" value="parse(replaceAll(long,"^[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_msec" value="parse(replaceAll(long,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_long_sign" value="if(matches(long,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\.S.*$"),-1,1)"/>
<parameter key="dd_long" value="dd_long_sign*(dd_long_deg + (dd_long_min/60.0) + (dd_long_sec/3600.0)) + (dd_long_msec/1000000.0)"/>
<parameter key="dd_lat_deg" value="parse(replaceAll(lat,"^([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_min" value="parse(replaceAll(lat,"^[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_sec" value="parse(replaceAll(lat,"^[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_msec" value="parse(replaceAll(lat,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\..*$","$1"))"/>
<parameter key="dd_lat_sign" value="if(matches(lat,"^[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+)\\.W.*$"),-1,1)"/>
<parameter key="dd_lat" value="dd_long_sign*(dd_lat_deg + (dd_lat_min/60.0) + (dd_lat_sec/3600.0)) + (dd_lat_msec/1000000.0)"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.6.000" expanded="true" height="82" name="Check Precision" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="dd_long_x" value="dd_long*1000"/>
<parameter key="dd_lat_x" value="dd_lat*1000"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Check Precision" to_port="example set input"/>
<connect from_op="Check Precision" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Also note that if you incorporate milliseconds, RapidMiner seems to be losing precision, which will translate into meters of difference.
JacobI'm the author of the GeoProcessing extension, feel free to ask me. It doesn't have an operator or conversion for this format, though.
Regards,
Balázs