"""
Original Classes written by Linwood Creekmore III (modified for geo_utils)
With code blocks from:
- http://programmingadvent.blogspot.com/2013/06/kmzkml-file-parsing-with-python.html
- http://gis.stackexchange.com/questions/159681/geopandas-cant-save-geojson
- https://gist.github.com/mciantyre/32ff2c2d5cd9515c1ee7
"""
from .geoconfig import *
import ast
import xml.sax
import xml.sax.handler
from html.parser import HTMLParser
[docs]class ModHTMLParser(HTMLParser):
"""A child of HTMLParser, tailored (modified) for kml/kmy parsing."""
def __init__(self):
HTMLParser.__init__(self)
self.in_table = False
self.mapping = {}
self.buffer = ""
self.name_tag = ""
self.series = pd.Series()
[docs] def handle_starttag(self, tag, attrs):
"""Enables a table if a table-tag is provided.
Args:
tag (str): Set to "table" for enabling usage of a table.
attrs (list): List of additional attributes (currently unused).
"""
if tag == "table":
self.in_table = True
[docs] def handle_data(self, data):
"""Generates mapping and series if ``in_table`` is ``True``.
Args:
data (str): Text lines of data divided by colons.
"""
if self.in_table:
self.buffer = data.strip(" \n\t").split(":")
if len(self.buffer) == 2:
self.mapping[self.buffer[0]] = self.buffer[1]
self.series = pd.Series(self.mapping)
[docs]class PlacemarkHandler(xml.sax.handler.ContentHandler):
"""Child of ``xml.sax.handler.ContentHandler``, tailored for handling kml files."""
def __init__(self):
#super().__init__()
self.inName = False # handle XML parser events
self.inPlacemark = False
self.mapping = {}
self.buffer = ""
self.name_tag = ""
[docs] def startElement(self, name, attributes):
"""Looks for the first Placemark element in a kml file.
Args:
name (str): Name-tag of the element
attributes (str):
"""
if name == "Placemark":
self.inPlacemark = True
self.buffer = ""
if self.inPlacemark:
if name == "name":
# save name text to follow
self.inName = True
[docs] def characters(self, data):
"""Adds a line of data to the read-buffer.
Args:
data (str)
"""
if self.inPlacemark:
# save text if in title in tag
self.buffer += data
[docs] def endElement(self, name):
"""Sets the end (last) element.
Args:
name (str)
"""
self.buffer = self.buffer.strip("\n\t")
if name == "Placemark":
# clear the current placemark and name
self.inPlacemark = False
self.name_tag = ""
elif name == "name" and self.inPlacemark:
# on end title tag
self.inName = False
self.name_tag = self.buffer.strip()
self.mapping[self.name_tag] = {}
elif self.inPlacemark and self.name_tag:
try:
if name in self.mapping[self.name_tag]:
self.mapping[self.name_tag][name] += self.buffer
else:
self.mapping[self.name_tag][name] = self.buffer
except KeyError:
pass
self.buffer = ""
[docs] def spatializer(row):
"""Converts string objects to spatial Python objects.
Args:
row (df): List of strings for conversion
"""
try:
# check if the coordinates column exists
data = row["coordinates"].strip(" \t\n\r")
except KeyError:
pass
except AttributeError:
pass
lsp = data.strip().split(" ")
linestring = map(lambda x: ast.literal_eval(x), lsp)
try:
spatial = Polygon(LineString(linestring))
converted_poly = pd.Series({"geometry": spatial})
return converted_poly
except:
try:
g = ast.literal_eval(data)
points = pd.Series({"geometry": Point(g[:2]),
"altitude": g[-1]})
return points
except:
pass
try:
# check if there are latitude and longitude columns
point = Point(float(row["longitude"]), float(row["latitude"]))
converted_poly = pd.Series({"geometry":point})
return converted_poly
except KeyError:
pass
[docs] def htmlizer(row):
"""Creates an html file."""
htmlparser = ModHTMLParser()
htmlparser.feed(row["description"])
return htmlparser.series