Flatten Json string using explode
Using PySpark to Read and Flatten JSON data with an enforced schema – Ben Alex Keen
data_df1 = spark.read.json(df.rdd.map( lambda row:row.body), schema=sensor_schema)
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, ArrayType
sensor_schema = StructType(fields=[
StructField('accelerometer_x', StringType(), True),
StructField('accelerometer_y', StringType(), True),
StructField('accelerometer_z',StringType(),True),
StructField('gyroscope',
StructType(fields=[
StructField('gyroscope_x',StringType(),True),
StructField('gyroscope_y',StringType(),True),
StructField('gyroscope_z',StringType(),True)
]))
])
data_df2 = spark.read.json(df.rdd.map( lambda row:row.body), schema=sensor_schema)
display(data_df2)
data_df2.printSchema()
data_df1=data_df2.select(col('accelerometer_x'),col('accelerometer_y'),col('accelerometer_z'),col('gyroscope.gyroscope_x'),col('gyroscope.gyroscope_y'),col('gyroscope.gyroscope_z'))
display(data_df1)
Comments
Post a Comment