Configure Data SDK for Java and Scala Jars on AWS EMR (DEPRECATED)

Following are the steps to configure Data SDK for Java and Scala jars to access the platform data on AWS EMR Jupyter notebooks.

Configure AWS EMR Cluster

  1. SSH to the cluster master node

  2. Add the following resolver to the ivy.settings.xml file if it exists, or create new ivy.settings.xml and copy paste the following content to the new file. Replace the username and password with the repository credentials from .m2/settings.xml (Note:The ivy.settings.xml can be placed anywhere as the path is provided later in the configuration.)

    <ivysettings>
    <settings defaultResolver="main" />
    <credentials host="repo.platform.here.com" realm="Artifactory Realm" username=“{user}” passwd=“{pass}" />
    <resolvers>
    <ibiblio name="here" m2compatible="true" root="https://repo.platform.here.com/artifactory/open-location-platform" />
    <ibiblio name="maven" root="http://repo1.maven.org/maven2" m2compatible="true" />
    <ibiblio name="bintray" root="https://dl.bintray.com/jroper/maven/" m2compatible="true" />
    <chain name="main">
    <resolver ref="here"/>
    <resolver ref="maven"/>
    <resolver ref="bintray"/>
    </chain>
    </resolvers>
    </ivysettings>
    
  3. Download the other dependencies:

    
    wget http://repo1.maven.org/maven2/org/scala-lang/modules/scala-java8-compat_2.11/0.8.0/scala-java8-compat_2.11-0.8.0.jar -P /usr/lib/spark/jars/
    
    wget http://repo1.maven.org/maven2/org/json4s/json4s-native_2.11/3.5.3/json4s-native_2.11-3.5.3.jar -P /usr/lib/spark/jars/
    
    wget http://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/3.10.0/protobuf-java-3.10.0.jar -P /usr/lib/spark/jars/
    

Configure AWS EMR Jupyter Notebook

Start an AWS EMR Jupyter Notebooks instance and within the notebooks, create a notebook and then select one of the required kernels and add the following cells:

Cell 1

%load_ext sparkmagic.magics

Cell 2

%manage_spark

Next, click on the create session tab in the output widget of the above commands and then paste the below json in the Properties input textbox.

Note

Add your credentials in the placeholder for the spark.driver.extraJavaOptions property from credentials.properties. Check the status of Spark Context and check the livy session by going to {EMR_Master_Node_IP}:8998.

{
  "driverMemory": "2G",
  "executorMemory": "4G",
  "executorCores": 2,
  "conf": {
    "spark.driver.extraJavaOptions": "-Dcom.here.platform.analytics.ds.schema.olp-artifact-service.env.artifact-prod.url=https://artifact.api.platform.hereolp.cn/v1 -Dhere.platform.data-client.endpoint-locator.discovery-service-env=here-cn,-Dhere.platform.data-client.request-signer.credentials.here-account.here-token-endpoint-url=<here.account.token.endpoint> -Dhere.platform.data-client.request-signer.credentials.here-account.here-client-id=<here.account.clientid> -Dhere.platform.data-client.request-signer.credentials.here-account.here-access-key-id=<here.access.key.id> -Dhere.platform.data-client.request-signer.credentials.here-account.here-access-key-secret=<here.access.key.secret>",
    "spark.executor.extraJavaOptions": "-Dcom.here.platform.analytics.ds.schema.olp-artifact-service.env.artifact-prod.url=https://artifact.api.platform.hereolp.cn/v1 -Dhere.platform.data-client.endpoint-locator.discovery-service-env=here-cn",
    "spark.scheduler.mode": "FAIR",
    "spark.executor.instances": 2,
    "spark.dynamicAllocation.enabled": "true",
    "spark.shuffle.service.enabled": "true",
    "spark.dynamicAllocation.executorIdleTimeout": "60s",
    "spark.dynamicAllocation.cachedExecutorIdleTimeout": "60s",
    "spark.dynamicAllocation.minExecutors": 2,
    "spark.dynamicAllocation.maxExecutors": 4,
    "spark.dynamicAllocation.initialExecutors": 2,
    "spark.jars.ivySettings": "/var/lib/spark/.here/ivy.settings.xml",
    "spark.driver.userClassPathFirst": "false",
    "spark.executor.userClassPathFirst": "false",
    "spark.jars.packages": "com.here.olp.util:mapquad:4.0.13,com.here.platform.location:location-compilation-core_2.11:0.20.184,com.here.platform.location:location-core_2.11:0.20.184,com.here.platform.location:location-inmemory_2.11:0.20.184,com.here.platform.location:location-integration-here-commons_2.11:0.20.184,com.here.platform.location:location-integration-optimized-map_2.11:0.20.184,com.here.platform.location:location-data-loader-standalone_2.11:0.20.184,com.here.platform.location:location-spark_2.11:0.20.184,com.here.platform.location:location-compilation-here-map-content_2.11:0.20.184,com.here.platform.location:location-examples-utils_2.11:0.4.115,com.here.schema.sdii:sdii_archive_v1_java:2.0.1,com.here.sdii:sdii_message_v3_java:4.0.1,com.here.sdii:sdii_message_list_v3_java:4.0.1,com.here.schema.rib:lane-attributes_v2_scala:2.33.0,com.here.schema.rib:road-traffic-pattern-attributes_v2_scala:2.33.0,com.here.schema.rib:advanced-navigation-attributes_v2_scala:2.33.0,com.here.schema.rib:cartography_v2_scala:2.33.0,com.here.schema.rib:adas-attributes_v2_scala:2.33.0,com.typesafe.akka:akka-actor_2.11:2.5.11,com.beachape:enumeratum_2.11:1.5.13,com.github.ben-manes.caffeine:caffeine:2.6.2,com.github.cb372:scalacache-caffeine_2.11:0.24.3,com.github.cb372:scalacache-core_2.11:0.24.3,com.github.os72:protoc-jar:3.6.0,com.google.protobuf:protobuf-java:3.6.1,com.iheart:ficus_2.11:1.4.3,com.typesafe:config:1.3.3,org.apache.logging.log4j:log4j-api-scala_2.11:11.0,org.typelevel:cats-core_2.11:1.4.0,org.typelevel:cats-kernel_2.11:1.4.0,org.apache.logging.log4j:log4j-api:2.8.2,com.here.platform.data.client:spark-support_2.11:0.5.30,com.here.platform.data.client:data-client_2.11:0.5.30,com.here.platform.data.client:client-core_2.11:0.5.30,com.here.platform.data.client:hrn_2.11:0.1.614,com.here.platform.data.client:data-engine_2.11:0.5.30,com.here.platform.data.client:blobstore-client_2.11:0.5.30,com.here.account:here-oauth-client:0.4.13,com.here.platform.analytics:spark-ds-connector-deps_2.11:0.6.15,com.here.platform.analytics:spark-ds-connector_2.11:0.6.15",
    "spark.jars.excludes": "com.google.protobuf:protobuf-java,com.here.*:*_proto,org.json4s:*,org.apache.spark:spark-core_2.11,org.apache.spark:spark-sql_2.11,org.apache.spark:spark-streaming_2.11,org.apache.spark:spark-launcher_2.11,org.apache.spark:spark-network-shuffle_2.11,org.apache.spark:spark-unsafe_2.11,org.apache.spark:spark-network-common_2.11,org.apache.spark:spark-tags_2.11,org.scala-lang:scala-library,org.scala-lang:scala-compiler,org.scala-lang.modules:scala-parser-combinators_2.11,org.scala-lang.modules:scala-java8-compat_2.11,org.scala-lang:scala-reflect,org.scala-lang:scalap,com.fasterxml.jackson.core:jackson-*"
  }
}

Thank you for choosing the HERE Data SDK for Python. After the setup, kindly consider filling out this short 1-minute survey to help us improve the setup experience.


results matching ""

    No results matching ""