Source code for rivus.utils.pandashp

""" pandashp: read/write shapefiles to/from special DataFrames

Offers two functions read_shp and write_shp that convert ESRI shapefiles to
pandas DataFrames that can be manipulated at will and then written back to
shapefiles. Opens up data manipulation capabilities beyond a simple GIS field
calculator.

Usage:
    import pandashp as pdshp
    # calculate population density from shapefile of cities (stupid, I know)
    cities = pdshp.read_shp('cities_germany_projected')
    cities['popdens'] = cities['population'] / cities['area']
    pdshp.write_shp(cities, 'cities_germany_projected_popdens')

"""

try:
    from itertools import izip as zip
except ImportError:  # zip is a builtin in Python 3.x
    pass
import numpy as np
import pandas as pd
import shapefile
from . import shapelytools
import warnings
from shapely.geometry import LineString, Point, Polygon


[docs]def read_shp(filename):
    """Read shapefile to dataframe w/ geometry.

    Args:
        filename: ESRI shapefile name to be read  (without .shp extension)

    Returns:
        pandas DataFrame with column geometry, containing individual shapely
        Geometry objects (i.e. Point, LineString, Polygon) depending on
        the shapefiles original shape type

    """
    sr = shapefile.Reader(filename)

    cols = sr.fields[:]  # [:] = duplicate field list
    if cols[0][0] == 'DeletionFlag':
        cols.pop(0)
    cols = [col[0] for col in cols]  # extract field name only
    cols.append('geometry')

    records = [row for row in sr.iterRecords()]

    if sr.shapeType == shapefile.POLYGON:
        geometries = [Polygon(shape.points)
                      if len(shape.points) > 2 else np.NaN  # invalid geometry
                      for shape in sr.iterShapes()]
    elif sr.shapeType == shapefile.POLYLINE:
        geometries = [LineString(shape.points) for shape in sr.iterShapes()]
    elif sr.shapeType == shapefile.POINT:
        geometries = [Point(*shape.points[0]) for shape in sr.iterShapes()]
    else:
        raise NotImplementedError

    data = [r + [g] for r, g in zip(records, geometries)]

    df = pd.DataFrame(data, columns=cols)
    df = df.convert_objects(convert_numeric=True)

    if np.NaN in geometries:
        # drop invalid geometries
        df = df.dropna(subset=['geometry'])
        num_skipped = len(geometries) - len(df)
        warnings.warn('Skipped {} invalid geometrie(s).'.format(num_skipped))
    return df


[docs]def write_shp(filename, dataframe, write_index=True):
    """Write dataframe w/ geometry to shapefile.

    Args:
        filename: ESRI shapefile name to be written (without .shp extension)
        dataframe: a pandas DataFrame with column geometry and homogenous
                   shape types (Point, LineString, or Polygon)
        write_index: add index as column to attribute tabel (default: true)

    Returns:
        Nothing.

    """

    df = dataframe.copy()
    if write_index:
        df.reset_index(inplace=True)

    # split geometry column from dataframe
    geometry = df.pop('geometry')

    # write geometries to shp/shx, according to geometry type
    if isinstance(geometry.iloc[0], Point):
        sw = shapefile.Writer(shapefile.POINT)
        for point in geometry:
            sw.point(point.x, point.y)

    elif isinstance(geometry.iloc[0], LineString):
        sw = shapefile.Writer(shapefile.POLYLINE)
        for line in geometry:
            sw.line([list(line.coords)])

    elif isinstance(geometry.iloc[0], Polygon):
        sw = shapefile.Writer(shapefile.POLYGON)
        for polygon in geometry:
            sw.poly([list(polygon.exterior.coords)])
    else:
        raise NotImplementedError

    # add fields for dbf
    for k, column in enumerate(df.columns):
        # unicode strings freak out pyshp, so remove u'..'
        column = str(column)

        if np.issubdtype(df.dtypes[k], np.number):
            # detect and convert integer-only columns
            if (df[column] % 1 == 0).all():
                df[column] = df[column].astype(np.integer)

            # now create the appropriate fieldtype
            if np.issubdtype(df.dtypes[k], np.floating):
                sw.field(column, 'N', decimal=5)
            else:
                sw.field(column, 'I', decimal=0)
        else:
            sw.field(column)

    # add records to dbf
    for record in df.itertuples():
        sw.record(*record[1:])  # drop first tuple element (=index)

    sw.save(filename)


[docs]def match_vertices_and_edges(vertices, edges, vertex_cols=('Vertex1', 'Vertex2')):
    """Adds unique IDs to vertices and corresponding edges.

    Identifies, which nodes coincide with the endpoints of edges and creates
    matching IDs for matching points, thus creating a node-edge graph whose
    edges are encoded purely by node ID pairs. The optional argument
    vertex_cols specifies which DataFrame columns of edges are added, default
    is 'Vertex1' and 'Vertex2'.

    Args:
        vertices: pandas DataFrame with geometry column of type Point
        edges: pandas DataFrame with geometry column of type LineString
        vertex_cols: tuple of 2 strings for the IDs numbers

    Returns:
        Nothing, the mathing IDs are added to the columns vertex_cols in
        argument edges
    """

    vertex_indices = []
    for e, line in enumerate(edges.geometry):
        edge_endpoints = []
        for k, vertex in enumerate(vertices.geometry):
            #  change start -------
            # if line.touches(vertex) or line.intersects(vertex):
            #     edge_endpoints.append(vertices.index[k])
            if vertex.buffer(0.001).intersects(line) or line.intersects(vertex):
                edge_endpoints.append(vertices.index[k])
            #  change stop -------

        if len(edge_endpoints) == 0:
            warnings.warn("edge " + str(e) +
                          " has no endpoints: " + str(edge_endpoints))
        elif len(edge_endpoints) == 1:
            warnings.warn("edge " + str(e) +
                          " has only 1 endpoint: " + str(edge_endpoints))

        vertex_indices.append(edge_endpoints)

    edges[vertex_cols[0]] = pd.Series([min(n1n2) for n1n2 in vertex_indices],
                                      index=edges.index)
    edges[vertex_cols[1]] = pd.Series([max(n1n2) for n1n2 in vertex_indices],
                                      index=edges.index)


[docs]def find_closest_edge(polygons, edges, to_attr='index', column='nearest'):
    """Find closest edge for centroid of polygons.

    Args:
        polygons: a pandas DataFrame with geometry column of Polygons
        edges: a pandas DataFrame with geometry column of LineStrings
        to_attr: a column name in DataFrame edges (default: index)
        column: a column name to be added/overwrite in DataFrame polygons with
                the value of column to_attr from the nearest edge in edges

    Returns:
        a list of LineStrings connecting polygons' centroids with the nearest
        point in in edges. Side effect: polygons recieves new column with the
        attribute value of nearest edge. Warning: if column exists, it is
        overwritten.
    """

    connecting_lines = []
    nearest_indices = []
    centroids = [b.centroid for b in polygons['geometry']]

    for centroid in centroids:
        nearest_edge, _, nearest_index = shapelytools.closest_object(
            edges['geometry'], centroid)
        nearest_point = shapelytools.project_point_to_object(
            centroid, nearest_edge)

        connecting_lines.append(LineString(tuple(centroid.coords) +
                                           tuple(nearest_point.coords)))

        nearest_indices.append(edges[to_attr][nearest_index])

    polygons[column] = pd.Series(nearest_indices, index=polygons.index)

    return pd.DataFrame({'geometry': connecting_lines})


[docs]def bounds(df):
    """Return a DataFrame of minx, miny, maxx, maxy of each geometry."""
    bounds = np.array([geom.bounds for geom in df.geometry])
    return pd.DataFrame(bounds,
                        columns=['minx', 'miny', 'maxx', 'maxy'],
                        index=df.index)


[docs]def total_bounds(df):
    """Return bounding box (minx, miny, maxx, maxy) of all geometries. """
    b = bounds(df)
    return (b['minx'].min(),
            b['miny'].min(),
            b['maxx'].max(),
            b['maxy'].max())