diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7435e01 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text eol=lf +shapefiles/**/* binary diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..ad56587 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: [geospatialpython] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml new file mode 100644 index 0000000..aa7e47b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -0,0 +1,47 @@ +name: Bug Report +description: Something in PyShp crashed and raised an exception. +title: "Title goes here..." +labels: ["bug"] +body: + - type: input + id: pyshp-version + attributes: + label: PyShp Version + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + placeholder: ... + validations: + required: true + - type: input + id: python-version + attributes: + label: Python Version + description: Please input the version of the Python executable. + placeholder: ... + validations: + required: true + - type: textarea + id: your-code + attributes: + label: Your code + description: Please copy-paste the relevant parts of your code or script that triggered the error. + placeholder: ... + render: shell + validations: + required: true + - type: textarea + id: stacktrace + attributes: + label: Full stacktrace + description: Please copy-paste the full stack trace of the exception that was raised. + placeholder: ... + render: shell + validations: + required: true + - type: textarea + id: notes + attributes: + label: Other notes + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + placeholder: ... + validations: + required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..a49eab2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: true \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml new file mode 100644 index 0000000..afb043a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -0,0 +1,21 @@ +name: Feature Request +description: You would like to request a new feature. +title: "Title goes here..." +labels: ["enhancement"] +body: + - type: textarea + id: functionality + attributes: + label: Describe the feature request + description: Please describe the functionality you would like added to PyShp. + placeholder: ... + validations: + required: true + - type: checkboxes + id: contribute + attributes: + label: Contributions + description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. + options: + - label: I am interested in implementing the described feature request and submit as a PR. + required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml new file mode 100644 index 0000000..d8c0cd0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -0,0 +1,13 @@ +name: Question +description: You have a question about PyShp or how to use it. +title: "Title goes here..." +labels: ["question"] +body: + - type: textarea + id: question + attributes: + label: What's your question? + description: Please describe what you would like to know about PyShp, e.g. how to do something. + placeholder: ... + validations: + required: true \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/unexpected.yml b/.github/ISSUE_TEMPLATE/unexpected.yml new file mode 100644 index 0000000..bf0a577 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/unexpected.yml @@ -0,0 +1,54 @@ +name: Unexpected Behavior +description: You think PyShp might be doing something wrong. +title: "Title goes here..." +labels: ["bug"] +body: + - type: input + id: pyshp-version + attributes: + label: PyShp Version + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + placeholder: ... + validations: + required: true + - type: input + id: python-version + attributes: + label: Python Version + description: Please input the version of the Python executable. + placeholder: ... + validations: + required: true + - type: textarea + id: your-code + attributes: + label: Your code + description: Please copy-paste the relevant parts of your code or script that you tried to run. + placeholder: ... + render: shell + validations: + required: true + - type: textarea + id: expected-results + attributes: + label: Expected results + description: Please describe what you expected to see in the output. + placeholder: ... + validations: + required: true + - type: textarea + id: actual-results + attributes: + label: Actual results + description: Please describe what you actually saw in the output. + placeholder: ... + validations: + required: true + - type: textarea + id: notes + attributes: + label: Other notes + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + placeholder: ... + validations: + required: false \ No newline at end of file diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml new file mode 100644 index 0000000..86ec93f --- /dev/null +++ b/.github/actions/test/action.yml @@ -0,0 +1,33 @@ +name: + Test + +description: + Run pytest, and run the doctest runner (shapefile.py as a script). + +runs: + using: "composite" + steps: + # The Repo is required to already be checked out, e.g. by the calling workflow + + # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + + - name: Doctests + shell: bash + run: python shapefile.py + + - name: Install test dependencies. + shell: bash + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + shell: bash + run: | + pytest + + - name: Show versions for logs. + shell: bash + run: | + python --version + python -m pytest --version \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..9a1fa30 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,60 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: deploy + +on: + release: + types: [published] + +jobs: + test: + + # In general, tests should be run after building a distribution, to test that distribution. + # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) + # then this would only test the packaging process, not so much the code as there are + # no binaries. + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Run tests and hooks + uses: ./.github/workflows/run_tests_and_hooks.yml + + deploy: + # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. + needs: test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + + - name: Publish package + if: github.repository == 'GeospatialPython/pyshp' + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_INTEGRATION }} diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml new file mode 100644 index 0000000..4c393a3 --- /dev/null +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -0,0 +1,88 @@ +# This workflow will run the pre-commit hooks (including linters), and the tests with a variety of Python versions + +name: Run pre-commit hooks and tests + +on: + push: + pull_request: + branches: [ master ] + workflow_call: + workflow_dispatch: + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 + + pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: install Pylint and plugin + run: | + python -m pip install --upgrade pip + pip install pytest pylint pylint-per-file-ignores + - name: run Pylint for errors and warnings only, on test_shapefile.py + run: | + pylint --disable=R,C test_shapefile.py + + test_on_old_Pythons: + strategy: + fail-fast: false + matrix: + python-version: [ + "2.7", + "3.5", + "3.6", + "3.7", + "3.8", + ] + + runs-on: ubuntu-latest + container: + image: python:${{ matrix.python-version }}-slim + + steps: + - uses: actions/checkout@v4 + + - name: Run tests + uses: ./.github/actions/test + + + run_tests: + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.10", + "3.11", + "3.12", + "3.13.0-rc.2", + ] + os: [ + "macos-latest", + "ubuntu-latest", + "ubuntu-24.04", + "windows-latest", + ] + include: + - os: ubuntu-24.04 + python-version: "3.14.0-alpha.0" + - os: ubuntu-22.04 + python-version: "3.14.0-alpha.0" + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + uses: ./.github/actions/test \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8d5646b..f6d6332 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,11 @@ shapefiles/test/latin_as_utf8.shx shapefiles/test/null.dbf shapefiles/test/null.shp shapefiles/test/null.shx +__pycache__/ +__cache__/ +build/ +dist/ +*.egg-info/ +*.py[cod] +.vscode +.dmypy.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f065f59 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: trailing-whitespace +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 + hooks: + - id: ruff-format diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e616be1..0000000 --- a/.travis.yml +++ /dev/null @@ -1,25 +0,0 @@ -language: python - -python: - - '2.7' - - '3.3' - - '3.4' - - '3.5' - -install: - - pip install . - -script: - - python shapefile.py - -deploy: - provider: pypi - edge: - branch: v1.8.45 - user: jlawhead - password: - secure: CI6igPEwYgPU0ZT4TfMsJ8D9HBfiGiEsPO01NHjx9kS1y6YdVedklnCGFn0J6qX7qHX/T995tS1OPU2izWVKkIIEOm9MbLzR4SSRqrHNOZAkXWClBkwIcoHEzQEnc5akRYBeisCqZtKn7w5EP1yquExHz+l2SKjwdS4HibA7wTA7g3tbX8oXIiFIANBy2vS18bmkxLCdV0lKdf8MU2TjavEghPL4vURyU31gMs5BaWHTsJH+2sLIzdBzYwkTeI+CKOmCr+jmLwHVMp0R43PfemSW5gK3BnLhrClfF4CU+Fu8Lypb6Glyo9kY6vsCf2d/5qzojx5j/1/rVeha7gJ35VodKnXZjgl+TwtUHiZL/MC/nqhPDx9ygtYZBjaVfgeWDAvqE0T+8KH1WhCwPDqFdjIuAmKqa5nJhWNueXNtmmDK9Bo0eUFgdHaThBbyPbRpqj5Pt9S82FIzOoWoxdy7Hv5D5xCmv4knjIcy9yDHj6KyTLDCHa4yH6aFJvYHj2Ml5nrLT/g08SyPl5kasZDnSIg1QI8A2GIJpjNbdqgCUwrpz+jZBGaAj/dq4UEfjNcjiaAQfBSdf2Suoz6z4oSLhXR9sC1+HgKG6OKHqt3hlq9hTUe8Tuj3DZ681eS00oXCPa5E5P4gDCoXUga9coZeGPOhbPBVIAQnxNRVaj6fqqU= - on: - tags: true - distributions: sdist bdist_wheel - repo: GeospatialPython/pyshp diff --git a/LICENSE.TXT b/LICENSE.TXT index b7d7276..d2b7446 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,9 +1,9 @@ -The MIT License (MIT) - -Copyright © 2013 Joel Lawhead - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The MIT License (MIT) + +Copyright © 2013 Joel Lawhead + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 815dd16..2a211cc 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,65 @@ # PyShp -The Python Shapefile Library (pyshp) reads and writes ESRI Shapefiles in pure Python. +The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Python. ![pyshp logo](http://4.bp.blogspot.com/_SBi37QEsCvg/TPQuOhlHQxI/AAAAAAAAAE0/QjFlWfMx0tQ/S350/GSP_Logo.png "PyShp") -[![Build Status](https://travis-ci.org/GeospatialPython/pyshp.svg?branch=master)](https://travis-ci.org/GeospatialPython/pyshp) +![build status](https://github.com/GeospatialPython/pyshp/actions/workflows/build.yml/badge.svg) + +- **Author**: [Joel Lawhead](https://github.com/GeospatialPython) +- **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) +- **Version**: 2.3.1 +- **Date**: 28 July, 2022 +- **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents -[Overview](#overview) - -[Version Changes](#version-changes) - -[Examples](#examples) -- [Reading Shapefiles](#reading-shapefiles) - - [Reading Shapefiles Using the Context Manager](#reading-shapefiles-using-the-context-manager) - - [Reading Shapefiles from File-Like Objects](#reading-shapefiles-from-file-like-objects) - - [Reading Shapefile Meta-Data](#reading-shapefile-meta-data) - - [Reading Geometry](#reading-geometry) - - [Reading Records](#reading-records) - - [Reading Geometry and Records Simultaneously](#reading-geometry-and-records-simultaneously) -- [Writing Shapefiles](#writing-shapefiles) - - [Writing Shapefiles Using the Context Manager](#writing-shapefiles-using-the-context-manager) - - [Writing Shapefiles to File-Like Objects](#writing-shapefiles-to-file-like-objects) - - [Setting the Shape Type](#setting-the-shape-type) - - [Adding Records](#adding-records) - - [Adding Geometry](#adding-geometry) - - [Geometry and Record Balancing](#geometry-and-record-balancing) - -[How To's](#how-tos) -- [3D and Other Geometry Types](#3d-and-other-geometry-types) -- [Working with Large Shapefiles](#working-with-large-shapefiles) -- [Unicode and Shapefile Encodings](#unicode-and-shapefile-encodings) - -[Testing](#testing) +- [Overview](#overview) +- [Version Changes](#version-changes) +- [The Basics](#the-basics) + - [Reading Shapefiles](#reading-shapefiles) + - [The Reader Class](#the-reader-class) + - [Reading Shapefiles from Local Files](#reading-shapefiles-from-local-files) + - [Reading Shapefiles from Zip Files](#reading-shapefiles-from-zip-files) + - [Reading Shapefiles from URLs](#reading-shapefiles-from-urls) + - [Reading Shapefiles from File-Like Objects](#reading-shapefiles-from-file-like-objects) + - [Reading Shapefiles Using the Context Manager](#reading-shapefiles-using-the-context-manager) + - [Reading Shapefile Meta-Data](#reading-shapefile-meta-data) + - [Reading Geometry](#reading-geometry) + - [Reading Records](#reading-records) + - [Reading Geometry and Records Simultaneously](#reading-geometry-and-records-simultaneously) + - [Writing Shapefiles](#writing-shapefiles) + - [The Writer Class](#the-writer-class) + - [Writing Shapefiles to Local Files](#writing-shapefiles-to-local-files) + - [Writing Shapefiles to File-Like Objects](#writing-shapefiles-to-file-like-objects) + - [Writing Shapefiles Using the Context Manager](#writing-shapefiles-using-the-context-manager) + - [Setting the Shape Type](#setting-the-shape-type) + - [Adding Records](#adding-records) + - [Adding Geometry](#adding-geometry) + - [Geometry and Record Balancing](#geometry-and-record-balancing) +- [Advanced Use](#advanced-use) + - [Common Errors and Fixes](#common-errors-and-fixes) + - [Warnings and Logging](#warnings-and-logging) + - [Shapefile Encoding Errors](#shapefile-encoding-errors) + - [Reading Large Shapefiles](#reading-large-shapefiles) + - [Iterating through a shapefile](#iterating-through-a-shapefile) + - [Limiting which fields to read](#limiting-which-fields-to-read) + - [Attribute filtering](#attribute-filtering) + - [Spatial filtering](#spatial-filtering) + - [Writing large shapefiles](#writing-large-shapefiles) + - [Merging multiple shapefiles](#merging-multiple-shapefiles) + - [Editing shapefiles](#editing-shapefiles) + - [3D and Other Geometry Types](#3d-and-other-geometry-types) + - [Shapefiles with measurement (M) values](#shapefiles-with-measurement-m-values) + - [Shapefiles with elevation (Z) values](#shapefiles-with-elevation-z-values) + - [3D MultiPatch Shapefiles](#3d-multipatch-shapefiles) +- [Testing](#testing) +- [Contributors](#contributors) # Overview -The Python Shapefile Library (pyshp) provides read and write support for the +The Python Shapefile Library (PyShp) provides read and write support for the Esri Shapefile format. The Shapefile format is a popular Geographic Information System vector data format created by Esri. For more information about this format please read the well-written "ESRI Shapefile Technical @@ -55,14 +76,14 @@ despite the numerous ways to store and exchange GIS data available today. Pyshp is compatible with Python 2.7-3.x. -This document provides examples for using pyshp to read and write shapefiles. However +This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), -and by searching for pyshp on [https://gis.stackexchange.com](https://gis.stackexchange.com). +and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). Currently the sample census blockgroup shapefile referenced in the examples is available on the GitHub project site at [https://github.com/GeospatialPython/pyshp](https://github.com/GeospatialPython/pyshp). These examples are straight-forward and you can also easily run them against your -own shapefiles with minimal modification. +own shapefiles with minimal modification. Important: If you are new to GIS you should read about map projections. Please visit: [https://github.com/GeospatialPython/pyshp/wiki/Map-Projections](https://github.com/GeospatialPython/pyshp/wiki/Map-Projections) @@ -74,47 +95,160 @@ part of your geospatial project. # Version Changes +## 2.3.x + +### New Features: +- Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. +- Equality comparisons between Records now also require the fields to be the same (and in the same order). + +### Development: +- Code quality tools run on PyShp + +## 2.3.1 + +### Bug fixes: + +- Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) + +## 2.3.0 + +### New Features: + +- Added support for pathlib and path-like shapefile filepaths (@mwtoews). +- Allow reading individual file extensions via filepaths. + +### Improvements: + +- Simplified setup and deployment (@mwtoews) +- Faster shape access when missing shx file +- Switch to named logger (see #240) + +### Bug fixes: + +- More robust handling of corrupt shapefiles (fixes #235) +- Fix errors when writing to individual file-handles (fixes #237) +- Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) +- Fix test issues in environments without network access (@sebastic, @musicinmybrain). + +## 2.2.0 + +### New Features: + +- Read shapefiles directly from zipfiles. +- Read shapefiles directly from urls. +- Allow fast extraction of only a subset of dbf fields through a `fields` arg. +- Allow fast filtering which shapes to read from the file through a `bbox` arg. + +### Improvements: + +- More examples and restructuring of README. +- More informative Shape to geojson warnings (see #219). +- Add shapefile.VERBOSE flag to control warnings verbosity (default True). +- Shape object information when calling repr(). +- Faster ring orientation checks, enforce geojson output ring orientation. + +### Bug fixes: + +- Remove null-padding at end of some record character fields. +- Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. +- Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) +- Fix bug where records and shapes would be assigned incorrect record number (@karanrn) +- Fix typos in docs (@timgates) + +## 2.1.3 + +### Bug fixes: + +- Fix recent bug in geojson hole-in-polygon checking (see #205) +- Misc fixes to allow geo interface dump to json (eg dates as strings) +- Handle additional dbf date null values, and return faulty dates as unicode (see #187) +- Add writer target typecheck +- Fix bugs to allow reading shp/shx/dbf separately +- Allow delayed shapefile loading by passing no args +- Fix error with writing empty z/m shapefile (@mcuprjak) +- Fix signed_area() so ignores z/m coords +- Enforce writing the 11th field name character as null-terminator (only first 10 are used) +- Minor README fixes +- Added more tests + +## 2.1.2 + +### Bug fixes: + +- Fix issue where warnings.simplefilter('always') changes global warning behavior [see #203] + +## 2.1.1 + +### Improvements: + +- Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) +- Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] +- Added pytest testing [@jmoujaes] + +### Bug fixes: + +- Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] +- Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] +- Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] +- Fix polygons not being auto closed, which was accidentally dropped +- Fix error for null geometries in feature geojson +- Misc docstring cleanup [@fiveham] + +## 2.1.0 + +### New Features: + +- Added back read/write support for unicode field names. +- Improved Record representation +- More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() + +### Bug fixes: + +- Fixed error when reading optional m-values +- Fixed Record attribute autocomplete in Python 3 +- Misc readme cleanup + ## 2.0.0 -The newest version of PyShp, version 2.0 introduced some major new improvements. +The newest version of PyShp, version 2.0 introduced some major new improvements. A great thanks to all who have contributed code and raised issues, and for everyone's -patience and understanding during the transition period. -Some of the new changes are incompatible with previous versions. +patience and understanding during the transition period. +Some of the new changes are incompatible with previous versions. Users of the previous version 1.x should therefore take note of the following changes -(Note: Some contributor attributions may be missing): +(Note: Some contributor attributions may be missing): ### Major Changes: -- Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. -- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. +- Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. +- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. - Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. + - New ways of inspecting shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] -- Add more support and documentation for MultiPatch 3D shapes. -- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. -- Better documentation of previously unclear aspects, such as field types. + - More convenient shape type name checking. [@megies] +- Add more support and documentation for MultiPatch 3D shapes. +- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. +- Better documentation of previously unclear aspects, such as field types. ### Important Fixes: - More reliable/robust: - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. + - Improved parsing of field value types, fixed errors and made more flexible. - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - Fix some geo interface errors, including checking polygon directions. - Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - Enforce maximum field limit. [@mwtoews] -# Examples +# The Basics Before doing anything you must import the library. @@ -123,10 +257,14 @@ Before doing anything you must import the library. The examples below will use a shapefile created from the U.S. Census Bureau Blockgroups data set near San Francisco, CA and available in the git -repository of the pyshp GitHub site. +repository of the PyShp GitHub site. ## Reading Shapefiles +### The Reader Class + +#### Reading Shapefiles from Local Files + To read a shapefile create a new "Reader" object and pass it the name of an existing shapefile. The shapefile format is actually a collection of three files. You specify the base filename of the shapefile or the complete filename @@ -146,25 +284,42 @@ OR >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") OR any of the other 5+ formats which are potentially part of a shapefile. The -library does not care about file extensions. +library does not care about file extensions. You can also specify that you only +want to read some of the file extensions through the use of keyword arguments: -### Reading Shapefiles Using the Context Manager -The "Reader" class can be used as a context manager, to ensure open file -objects are properly closed when done reading the data: + >>> sf = shapefile.Reader(dbf="shapefiles/blockgroups.dbf") - >>> with shapefile.Reader("shapefiles/blockgroups.shp") as shp: - ... print(shp) - shapefile Reader - 663 shapes (type 'POLYGON') - 663 records (44 fields) +#### Reading Shapefiles from Zip Files + +If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: + + + >>> sf = shapefile.Reader("shapefiles/blockgroups.zip") + +If the zip file contains multiple shapefiles, just specify which shapefile to read by additionally specifying the relative path after the ".zip" part: -### Reading Shapefiles from File-Like Objects + + >>> sf = shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") + +#### Reading Shapefiles from URLs + +Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: + + + >>> # from a zipped shapefile on website + >>> sf = shapefile.Reader("https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip") + + >>> # from a shapefile collection of files in a github repository + >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true") + +This will automatically download the file(s) to a temporary location before reading, saving you a lot of time and repetitive boilerplate code when you just want quick access to some external data. + +#### Reading Shapefiles from File-Like Objects You can also load shapefiles from any Python file-like object using keyword arguments to specify any of the three files. This feature is very powerful and -allows you to load shapefiles from a url, from a zip file, serialized object, -or in some cases a database. +allows you to custom load shapefiles from arbitrary storage formats, such as a protected url or zip file, a serialized object, or in some cases a database. >>> myshp = open("shapefiles/blockgroups.shp", "rb") @@ -172,24 +327,36 @@ or in some cases a database. >>> r = shapefile.Reader(shp=myshp, dbf=mydbf) Notice in the examples above the shx file is never used. The shx file is a -very simple fixed-record index for the variable length records in the shp -file. This file is optional for reading. If it's available pyshp will use the +very simple fixed-record index for the variable-length records in the shp +file. This file is optional for reading. If it's available PyShp will use the shx file to access shape records a little faster but will do just fine without it. -### Reading Shapefile Meta-Data +#### Reading Shapefiles Using the Context Manager + +The "Reader" class can be used as a context manager, to ensure open file +objects are properly closed when done reading the data: + + >>> with shapefile.Reader("shapefiles/blockgroups.shp") as shp: + ... print(shp) + shapefile Reader + 663 shapes (type 'POLYGON') + 663 records (44 fields) + +#### Reading Shapefile Meta-Data Shapefiles have a number of attributes for inspecting the file contents. -A shapefile is a container for a specific type of geometry, and this can be checked using the -shapeType attribute. +A shapefile is a container for a specific type of geometry, and this can be checked using the +shapeType attribute. + >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") >>> sf.shapeType 5 Shape types are represented by numbers between 0 and 31 as defined by the -shapefile specification and listed below. It is important to note that numbering system has -several reserved numbers which have not been used yet therefore the numbers of +shapefile specification and listed below. It is important to note that the numbering system has +several reserved numbers that have not been used yet, therefore the numbers of the existing shape types are not sequential: - NULL = 0 @@ -206,7 +373,7 @@ the existing shape types are not sequential: - POLYGONM = 25 - MULTIPOINTM = 28 - MULTIPATCH = 31 - + Based on this we can see that our blockgroups shapefile contains Polygon type shapes. The shape types are also defined as constants in the shapefile module, so that we can compare types more intuitively: @@ -220,9 +387,9 @@ For convenience, you can also get the name of the shape type as a string: >>> sf.shapeTypeName == 'POLYGON' True - -Other pieces of meta-data that we can check includes the number of features, -or the bounding box area the shapefile covers: + +Other pieces of meta-data that we can check include the number of features +and the bounding box area the shapefile covers: >>> len(sf) @@ -230,6 +397,14 @@ or the bounding box area the shapefile covers: >>> sf.bbox [-122.515048, 37.652916, -122.327622, 37.863433] +Finally, if you would prefer to work with the entire shapefile in a different +format, you can convert all of it to a GeoJSON dictionary, although you may lose +some information in the process, such as z- and m-values: + + + >>> sf.__geo_interface__['type'] + 'FeatureCollection' + ### Reading Geometry A shapefile's geometry is the collection of points or shapes made from @@ -249,45 +424,55 @@ each shape record. >>> len(shapes) 663 - + To read a single shape by calling its index use the shape() method. The index is the shape's count from 0. So to read the 8th shape record you would use its index which is 7. >>> s = sf.shape(7) + >>> s + Shape #7: POLYGON >>> # Read the bbox of the 8th shape to verify >>> # Round coordinates to 3 decimal places >>> ['%.3f' % coord for coord in s.bbox] ['-122.450', '37.801', '-122.442', '37.808'] -Each shape record (except Points) contain the following attributes. Records of shapeType Point do not have a bounding box 'bbox'. +Each shape record (except Points) contains the following attributes. Records of +shapeType Point do not have a bounding box 'bbox'. >>> for name in dir(shapes[3]): ... if not name.startswith('_'): ... name 'bbox' + 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' - * shapeType: an integer representing the type of shape as defined by the + * `oid`: The shape's index position in the original shapefile. + + + >>> shapes[3].oid + 3 + + * `shapeType`: an integer representing the type of shape as defined by the shapefile specification. >>> shapes[3].shapeType 5 - * shapeTypeName: a string representation of the type of shape as defined by shapeType. Read-only. + * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. >>> shapes[3].shapeTypeName 'POLYGON' - - * bbox: If the shape type contains multiple points this tuple describes the + + * `bbox`: If the shape type contains multiple points this tuple describes the lower left (x,y) coordinate and upper right corner coordinate creating a complete box around the points. If the shapeType is a Null (shapeType == 0) then an AttributeError is raised. @@ -299,7 +484,7 @@ Each shape record (except Points) contain the following attributes. Records of s >>> ['%.3f' % coord for coord in bbox] ['-122.486', '37.787', '-122.446', '37.811'] - * parts: Parts simply group collections of points into shapes. If the shape + * `parts`: Parts simply group collections of points into shapes. If the shape record has multiple parts this attribute contains the index of the first point of each part. If there is only one part then a list containing 0 is returned. @@ -308,7 +493,7 @@ Each shape record (except Points) contain the following attributes. Records of s >>> shapes[3].parts [0] - * points: The points attribute contains a list of tuples containing an + * `points`: The points attribute contains a list of tuples containing an (x,y) coordinate for each point in the shape. @@ -320,7 +505,7 @@ Each shape record (except Points) contain the following attributes. Records of s >>> ['%.3f' % coord for coord in shape] ['-122.471', '37.787'] -In most cases, however, if you need to more than just type or bounds checking, you may want +In most cases, however, if you need to do more than just type or bounds checking, you may want to convert the geometry to the more human-readable [GeoJSON format](http://geojson.org), where lines and polygons are grouped for you: @@ -329,12 +514,26 @@ where lines and polygons are grouped for you: >>> geoj = s.__geo_interface__ >>> geoj["type"] 'MultiPolygon' - + +The results from the shapes() method similarly supports converting to GeoJSON: + + + >>> shapes.__geo_interface__['type'] + 'GeometryCollection' + +Note: In some cases, if the conversion from shapefile geometry to GeoJSON encountered any problems +or potential issues, a warning message will be displayed with information about the affected +geometry. To ignore or suppress these warnings, you can disable this behavior by setting the +module constant VERBOSE to False: + + + >>> shapefile.VERBOSE = False + ### Reading Records A record in a shapefile contains the attributes for each shape in the -collection of geometry. Records are stored in the dbf file. The link between +collection of geometries. Records are stored in the dbf file. The link between geometry and attributes is the foundation of all geographic information systems. This critical link is implied by the order of shapes and corresponding records in the shp geometry file and the dbf attribute file. @@ -344,12 +543,12 @@ You can call the "fields" attribute of the shapefile as a Python list. Each field is a Python list with the following information: * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * Field type: the type of data at this column index. Types can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). - * "L": Logical, for boolean True/False values. - * "D": Dates. + * "L": Logical, for boolean True/False values. + * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. * Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" @@ -381,6 +580,16 @@ attribute: ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] +The first field of a dbf file is always a 1-byte field called "DeletionFlag", +which indicates records that have been deleted but not removed. However, +since this flag is very rarely used, PyShp currently will return all records +regardless of their deletion flag, and the flag is also not included in the list of +record values. In other words, the DeletionFlag field has no real purpose, and +should in most cases be ignored. For instance, to get a list of all fieldnames: + + + >>> fieldnames = [f[0] for f in sf.fields[1:]] + You can get a list of the shapefile's records by calling the records() method: @@ -393,10 +602,10 @@ To read a single record call the record() method with the record's index: >>> rec = sf.record(3) - + Each record is a list-like Record object containing the values corresponding to each field in -the field list. A record's values can be accessed by positional indexing or slicing. -For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id +the field list (except the DeletionFlag). A record's values can be accessed by positional indexing or slicing. +For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id and the 1990 population count of that San Francisco blockgroup: @@ -404,7 +613,7 @@ and the 1990 population count of that San Francisco blockgroup: ['060750601001', 4715] For simpler access, the fields of a record can also accessed via the name of the field, -either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile +either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile can also be retrieved as: @@ -413,7 +622,7 @@ can also be retrieved as: >>> rec.BKG_KEY '060750601001' - + The record values can be easily integrated with other programs by converting it to a field-value dictionary: @@ -421,13 +630,13 @@ The record values can be easily integrated with other programs by converting it >>> sorted(dct.items()) [('AGE_18_29', 1467), ('AGE_30_49', 1681), ('AGE_50_64', 92), ('AGE_5_17', 848), ('AGE_65_UP', 30), ('AGE_UNDER5', 597), ('AMERI_ES', 6), ('AREA', 2.34385), ('ASIAN_PI', 452), ('BKG_KEY', '060750601001'), ('BLACK', 1007), ('DIVORCED', 149), ('FEMALES', 2095), ('FHH_CHILD', 16), ('HISPANIC', 416), ('HOUSEHOLDS', 1195), ('HSEHLD_1_F', 40), ('HSEHLD_1_M', 22), ('HSE_UNITS', 1258), ('MALES', 2620), ('MARHH_CHD', 79), ('MARHH_NO_C', 958), ('MARRIED', 2021), ('MEDIANRENT', 739), ('MEDIAN_VAL', 337500), ('MHH_CHILD', 0), ('MOBILEHOME', 0), ('NEVERMARRY', 703), ('OTHER', 288), ('OWNER_OCC', 66), ('POP1990', 4715), ('POP90_SQMI', 2011.6), ('RENTER_OCC', 3733), ('SEPARATED', 49), ('UNITS10_49', 49), ('UNITS2', 160), ('UNITS3_9', 672), ('UNITS50_UP', 0), ('UNITS_1ATT', 302), ('UNITS_1DET', 43), ('VACANT', 93), ('WHITE', 2962), ('WIDOWED', 37)] -If at a later point you need to check the record's index position in the original +If at a later point you need to check the record's index position in the original shapefile, you can do this through the "oid" attribute: >>> rec.oid 3 - + ### Reading Geometry and Records Simultaneously You may want to examine both the geometry and the attributes for a record at @@ -449,34 +658,32 @@ Let's read the blockgroup key and the population for the 4th blockgroup: >>> shapeRecs[3].record[1:3] ['060750601001', 4715] -Now let's read the first two points for that same record: - +The results from the shapeRecords() method is a list-like object that can be easily converted +to GeoJSON through the _\_geo_interface\_\_: - >>> points = shapeRecs[3].shape.points[0:2] - >>> len(points) - 2 + >>> shapeRecs.__geo_interface__['type'] + 'FeatureCollection' The shapeRecord() method reads a single shape/record pair at the specified index. To get the 4th shape record from the blockgroups shapefile use the third index: >>> shapeRec = sf.shapeRecord(3) - -The blockgroup key and population count: - - >>> shapeRec.record[1:3] ['060750601001', 4715] - >>> points = shapeRec.shape.points[0:2] +Each individual shape record also supports the _\_geo_interface\_\_ to convert it to a GeoJSON feature: + + + >>> shapeRec.__geo_interface__['type'] + 'Feature' - >>> len(points) - 2 - ## Writing Shapefiles +### The Writer Class + PyShp tries to be as flexible as possible when writing shapefiles while maintaining some degree of automatic validation to make sure you don't accidentally write an invalid file. @@ -491,12 +698,15 @@ interest. Many precision agriculture chemical field sprayers also use the shp format as a control file for the sprayer system (usually in combination with custom database file formats). +#### Writing Shapefiles to Local Files + To create a shapefile you begin by initiating a new Writer instance, passing it the file path and name to save to: >>> w = shapefile.Writer('shapefiles/test/testfile') - + >>> w.field('field1', 'C') + File extensions are optional when reading or writing shapefiles. If you specify them PyShp ignores them anyway. When you save files you can specify a base file name that is used for all three file types. Or you can specify a name for @@ -504,27 +714,12 @@ one or more file types: >>> w = shapefile.Writer(dbf='shapefiles/test/onlydbf.dbf') - -In that case, any file types not assigned will not -save and only file types with file names will be saved. - -### Writing Shapefiles Using the Context Manager - -The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. -In case of a crash and to make the code more readable, it is nevertheless recommended -you do this manually by calling the "close()" method: - - - >>> w.close() - -Alternatively, you can also use the "Writer" class as a context manager, to ensure open file -objects are properly closed and final headers written once you exit the with-clause: - + >>> w.field('field1', 'C') - >>> with shapefile.Writer("shapefiles/test/contextwriter") as shp: - ... pass +In that case, any file types not assigned will not +save and only file types with file names will be saved. -### Writing Shapefiles to File-Like Objects +#### Writing Shapefiles to File-Like Objects Just as you can read shapefiles from python file-like objects you can also write to them: @@ -542,22 +737,51 @@ write to them: >>> w.record() >>> w.null() >>> w.close() + >>> # To read back the files you could call the "StringIO.getvalue()" method later. - -### Setting the Shape Type + >>> assert shp.getvalue() + >>> assert shx.getvalue() + >>> assert dbf.getvalue() + + >>> # In fact, you can read directly from them using the Reader + >>> r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) + >>> len(r) + 1 + + + +#### Writing Shapefiles Using the Context Manager + +The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. +In case of a crash and to make the code more readable, it is nevertheless recommended +you do this manually by calling the "close()" method: + + + >>> w.close() + +Alternatively, you can also use the "Writer" class as a context manager, to ensure open file +objects are properly closed and final headers written once you exit the with-clause: + + + >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: + ... w.field('field1', 'C') + ... pass + +#### Setting the Shape Type The shape type defines the type of geometry contained in the shapefile. All of the shapes must match the shape type setting. -There are three ways to set the shape type: - * Set it when creating the class instance. - * Set it by assigning a value to an existing class instance. +There are three ways to set the shape type: + * Set it when creating the class instance. + * Set it by assigning a value to an existing class instance. * Set it automatically to the type of the first non-null shape by saving the shapefile. To manually set the shape type for a Writer object when creating the Writer: >>> w = shapefile.Writer('shapefiles/test/shapetype', shapeType=3) + >>> w.field('field1', 'C') >>> w.shapeType 3 @@ -569,14 +793,14 @@ OR you can set it after the Writer is created: >>> w.shapeType 1 - + ### Adding Records -Before you can add records you must first create the fields that define what types of -values will go into each attribute. +Before you can add records you must first create the fields that define what types of +values will go into each attribute. -There are several different field types, all of which support storing None values as NULL. +There are several different field types, all of which support storing None values as NULL. Text fields are created using the 'C' type, and the third 'size' argument can be customized to the expected length of text values to save space: @@ -589,12 +813,12 @@ length of text values to save space: >>> w.null() >>> w.record('Hello', 'World', 'World'*50) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == ['Hello', 'World', 'World'*50] -Date fields are created using the 'D' type, and can be created using either -date objects, lists, or a YYYYMMDD formatted string. +Date fields are created using the 'D' type, and can be created using either +date objects, lists, or a YYYYMMDD formatted string. Field length or decimal have no impact on this type: @@ -610,18 +834,18 @@ Field length or decimal have no impact on this type: >>> w.record('19980130') >>> w.record(None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [date(1898,1,30)] >>> assert r.record(1) == [date(1998,1,30)] >>> assert r.record(2) == [date(1998,1,30)] >>> assert r.record(3) == [None] -Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). -By default the fourth decimal argument is set to zero, essentially creating an integer field. -To store floats you must set the decimal argument to the precision of your choice. -To store very large numbers you must increase the field length size to the total number of digits -(including comma and minus). +Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). +By default the fourth decimal argument is set to zero, essentially creating an integer field. +To store floats you must set the decimal argument to the precision of your choice. +To store very large numbers you must increase the field length size to the total number of digits +(including comma and minus). >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -637,15 +861,15 @@ To store very large numbers you must increase the field length size to the total >>> w.record(INT=nr, LOWPREC=nr, MEDPREC=nr, HIGHPREC=-3.2302e-25, FTYPE=nr, LARGENR=int(nr)*10**100) >>> w.record(None, None, None, None, None, None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [1, 1.32, 1.3217328, -3.2302e-25, 1.3217328, 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] >>> assert r.record(1) == [None, None, None, None, None, None] - -Finally, we can create boolean fields by setting the type to 'L'. -This field can take True or False values, or 1 (True) or 0 (False). -None is interpreted as missing. + +Finally, we can create boolean fields by setting the type to 'L'. +This field can take True or False values, or 1 (True) or 0 (False). +None is interpreted as missing. >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -661,23 +885,23 @@ None is interpreted as missing. >>> w.record(False) >>> w.record(0) >>> w.record(None) - >>> w.record("Nonesense") + >>> w.record("Nonsense") >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> r.record(0) - [True] + Record #0: [True] >>> r.record(1) - [True] + Record #1: [True] >>> r.record(2) - [False] + Record #2: [False] >>> r.record(3) - [False] + Record #3: [False] >>> r.record(4) - [None] + Record #4: [None] >>> r.record(5) - [None] - + Record #5: [None] + You can also add attributes using keyword arguments where the keys are field names. @@ -694,12 +918,12 @@ You can also add attributes using keyword arguments where the keys are field nam Geometry is added using one of several convenience methods. The "null" method is used for null shapes, "point" is used for point shapes, "multipoint" is used for multipoint shapes, "line" for lines, -"poly" for polygons. +"poly" for polygons. **Adding a Null shape** -A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. -Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. +A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. +Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. >>> w = shapefile.Writer('shapefiles/test/null') @@ -713,76 +937,77 @@ Because Null shape types (shape type 0) have no geometry the "null" method is ca **Adding a Point shape** Point shapes are added using the "point" method. A point is specified by an x and -y value. +y value. >>> w = shapefile.Writer('shapefiles/test/point') >>> w.field('name', 'C') - - >>> w.point(122, 37) + + >>> w.point(122, 37) >>> w.record('point1') - + >>> w.close() **Adding a MultiPoint shape** -If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. -These are specified as a list of xy point coordinates. +If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. +These are specified as a list of xy point coordinates. >>> w = shapefile.Writer('shapefiles/test/multipoint') >>> w.field('name', 'C') - - >>> w.multipoint([[122,37], [124,32]]) + + >>> w.multipoint([[122,37], [124,32]]) >>> w.record('multipoint1') - + >>> w.close() - + **Adding a LineString shape** -For LineString shapefiles, each line shape consists of multiple lines. Line shapes must be given as a list of lines, -even if there is just one line. Also, each line must have at least two points. - - +For LineString shapefiles, each shape is given as a list of one or more linear features. +Each of the linear features must have at least two points. + + >>> w = shapefile.Writer('shapefiles/test/line') >>> w.field('name', 'C') - + >>> w.line([ ... [[1,5],[5,5],[5,1],[3,3],[1,1]], # line 1 ... [[3,2],[2,6]] # line 2 ... ]) - + >>> w.record('linestring1') - + >>> w.close() - + **Adding a Polygon shape** Similarly to LineString, Polygon shapes consist of multiple polygons, and must be given as a list of polygons. -The main difference being that polygons must have at least 4 points and the last point must be the same as the first. -It's also okay if you forget to do so, PyShp automatically checks and closes the polygons if you don't. +The main difference is that polygons must have at least 4 points and the last point must be the same as the first. +It's also okay if you forget to repeat the first point at the end; PyShp automatically checks and closes the polygons +if you don't. It's important to note that for Polygon shapefiles, your polygon coordinates must be ordered in a clockwise direction. If any of the polygons have holes, then the hole polygon coordinates must be ordered in a counterclockwise direction. -The direction of your polygons determines how shapefile readers will distinguish between polygons outlines and holes. +The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. >>> w = shapefile.Writer('shapefiles/test/polygon') >>> w.field('name', 'C') >>> w.poly([ - ... [[122,37], [117,36], [115,32], [118,20], [113,24]], # poly 1 - ... [[15,2], [17,6], [22,7]], # hole 1 - ... [[122,37], [117,36], [115,32]] # poly 2 + ... [[113,24], [112,32], [117,36], [122,37], [118,20]], # poly 1 + ... [[116,29],[116,26],[119,29],[119,32]], # hole 1 + ... [[15,2], [17,6], [22,7]] # poly 2 ... ]) >>> w.record('polygon1') - + >>> w.close() - + **Adding from an existing Shape object** Finally, geometry can be added by passing an existing "Shape" object to the "shape" method. -You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. +You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. This can be particularly useful for copying from one file to another: @@ -795,14 +1020,14 @@ This can be particularly useful for copying from one file to another: >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape) - + >>> # or GeoJSON dicts >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape.__geo_interface__) - - >>> w.close() - + + >>> w.close() + ### Geometry and Record Balancing @@ -811,18 +1036,18 @@ number of records equals the number of shapes to create a valid shapefile. You must take care to add records and shapes in the same order so that the record data lines up with the geometry data. For example: - + >>> w = shapefile.Writer('shapefiles/test/balancing', shapeType=shapefile.POINT) >>> w.field("field1", "C") >>> w.field("field2", "C") - + >>> w.record("row", "one") >>> w.point(1, 1) - + >>> w.record("row", "two") >>> w.point(2, 2) - -To help prevent accidental misalignment pyshp has an "auto balance" feature to + +To help prevent accidental misalignment PyShp has an "auto balance" feature to make sure when you add either a shape or a record the two sides of the equation line up. This way if you forget to update an entry the shapefile will still be valid and handled correctly by most shapefile @@ -834,7 +1059,7 @@ the attribute autoBalance to 1 or True: >>> w.record("row", "three") >>> w.record("row", "four") >>> w.point(4, 4) - + >>> w.recNum == w.shpNum True @@ -843,7 +1068,7 @@ to ensure the other side is up to date. When balancing is used null shapes are created on the geometry side or records with a value of "NULL" for each field is created on the attribute side. This gives you flexibility in how you build the shapefile. -You can create all of the shapes and then create all of the records or vice versa. +You can create all of the shapes and then create all of the records or vice versa. >>> w.autoBalance = 0 @@ -853,238 +1078,425 @@ You can create all of the shapes and then create all of the records or vice vers >>> w.point(5, 5) >>> w.point(6, 6) >>> w.balance() - + >>> w.recNum == w.shpNum True -If you do not use the autobalance or balance method and forget to manually +If you do not use the autoBalance() or balance() method and forget to manually balance the geometry and attributes the shapefile will be viewed as corrupt by most shapefile software. - +### Writing .prj files +A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". + +If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": + +``` + with open("{}.prj".format(filename), "w") as prj: + wkt = 'GEOGCS["WGS 84",' + wkt += 'DATUM["WGS_1984",' + wkt += 'SPHEROID["WGS 84",6378137,298.257223563]]' + wkt += ',PRIMEM["Greenwich",0],' + wkt += 'UNIT["degree",0.0174532925199433]]' + prj.write(wkt) +``` + +If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. + +# Advanced Use + +## Common Errors and Fixes + +Below we list some commonly encountered errors and ways to fix them. + +### Warnings and Logging + +By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: + + + >>> shapefile.VERBOSE = False + +All logging happens under the namespace `shapefile`. So another way to suppress all PyShp warnings would be to alter the logging behavior for that namespace: + + + >>> import logging + >>> logging.getLogger('shapefile').setLevel(logging.ERROR) + +### Shapefile Encoding Errors + +PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. +Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. +If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. +For instance, when working with English language shapefiles, a common reason for encoding errors is that the shapefile was written in Latin-1 encoding. +For reading shapefiles in any non-utf8 encoding, such as Latin-1, just +supply the encoding option when creating the Reader class. + + + >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") + >>> r.record(0) == [2, u'Ă‘andĂş'] + True + +Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such +as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in +should give you the same unicode string you started with. + + + >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") + >>> w.fields = r.fields[1:] + >>> w.record(*r.record(0)) + >>> w.null() + >>> w.close() + + >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") + >>> r.record(0) == [2, u'Ă‘andĂş'] + True + +If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an +exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore +or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This +applies to both reading and writing. + + + >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") + >>> r.record(0) == [2, u'ďż˝andďż˝'] + True + + + +## Reading Large Shapefiles + +Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions +of records and complex geometries. + +### Iterating through a shapefile + +As an example, let's load this Natural Earth shapefile of more than 4000 global administrative boundary polygons: + + + >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/10m_cultural/ne_10m_admin_1_states_provinces?raw=true") + +When first creating the Reader class, the library only reads the header information +and leaves the rest of the file contents alone. Once you call the records() and shapes() +methods however, it will attempt to read the entire file into memory at once. +For very large files this can result in MemoryError. So when working with large files +it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() +methods instead. These iterate through the file contents one at a time, enabling you to loop +through them while keeping memory usage at a minimum. + + + >>> for shape in sf.iterShapes(): + ... # do something here + ... pass + + >>> for rec in sf.iterRecords(): + ... # do something here + ... pass + + >>> for shapeRec in sf.iterShapeRecords(): + ... # do something here + ... pass + + >>> for shapeRec in sf: # same as iterShapeRecords() + ... # do something here + ... pass + +### Limiting which fields to read + +By default when reading the attribute records of a shapefile, pyshp unpacks and returns the data for all of the dbf fields, regardless of whether you actually need that data or not. To limit which field data is unpacked when reading each record and speed up processing time, you can specify the `fields` argument to any of the methods involving record data. Note that the order of the specified fields does not matter, the resulting records will list the specified field values in the order that they appear in the original dbf file. For instance, if we are only interested in the country and name of each admin unit, the following is a more efficient way of iterating through the file: + + + >>> fields = ["geonunit", "name"] + >>> for rec in sf.iterRecords(fields=fields): + ... # do something + ... pass + >>> rec + Record #4595: ['Birgu', 'Malta'] + +### Attribute filtering + +In many cases, we aren't interested in all entries of a shapefile, but rather only want to retrieve a small subset of records by filtering on some attribute. To avoid wasting time reading records and shapes that we don't need, we can start by iterating only the records and fields of interest, check if the record matches some condition as a way to filter the data, and finally load the full record and shape geometry for those that meet the condition: + + + >>> filter_field = "geonunit" + >>> filter_value = "Eritrea" + >>> for rec in sf.iterRecords(fields=[filter_field]): + ... if rec[filter_field] == filter_value: + ... # load full record and shape + ... shapeRec = sf.shapeRecord(rec.oid) + ... shapeRec.record["name"] + 'Debubawi Keyih Bahri' + 'Debub' + 'Semenawi Keyih Bahri' + 'Gash Barka' + 'Maekel' + 'Anseba' + +Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. + +### Spatial filtering + +Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to the shapes, iterShapes, or iterShapeRecords methods: + + + >>> bbox = [36.423, 12.360, 43.123, 18.004] # ca bbox of Eritrea + >>> fields = ["geonunit","name"] + >>> for shapeRec in sf.iterShapeRecords(bbox=bbox, fields=fields): + ... shapeRec.record + Record #368: ['Afar', 'Ethiopia'] + Record #369: ['Tadjourah', 'Djibouti'] + Record #375: ['Obock', 'Djibouti'] + Record #376: ['Debubawi Keyih Bahri', 'Eritrea'] + Record #1106: ['Amhara', 'Ethiopia'] + Record #1107: ['Gedarif', 'Sudan'] + Record #1108: ['Tigray', 'Ethiopia'] + Record #1414: ['Sa`dah', 'Yemen'] + Record #1415: ['`Asir', 'Saudi Arabia'] + Record #1416: ['Hajjah', 'Yemen'] + Record #1417: ['Jizan', 'Saudi Arabia'] + Record #1598: ['Debub', 'Eritrea'] + Record #1599: ['Red Sea', 'Sudan'] + Record #1600: ['Semenawi Keyih Bahri', 'Eritrea'] + Record #1601: ['Gash Barka', 'Eritrea'] + Record #1602: ['Kassala', 'Sudan'] + Record #1603: ['Maekel', 'Eritrea'] + Record #2037: ['Al Hudaydah', 'Yemen'] + Record #3741: ['Anseba', 'Eritrea'] + +This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. + + + +## Writing large shapefiles + +Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory +usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately +writing each geometry and record to disk the moment they +are added using shape() or record(). Once the writer is closed, exited, or garbage +collected, the final header information is calculated and written to the beginning of +the file. + +### Merging multiple shapefiles + +This means that it's possible to merge hundreds or thousands of shapefiles, as +long as you iterate through the source files to avoid loading everything into +memory. The following example copies the contents of a shapefile to a new file 10 times: + + >>> # create writer + >>> w = shapefile.Writer('shapefiles/test/merge') + + >>> # copy over fields from the reader + >>> r = shapefile.Reader("shapefiles/blockgroups") + >>> for field in r.fields[1:]: + ... w.field(*field) + + >>> # copy the shapefile to writer 10 times + >>> repeat = 10 + >>> for i in range(repeat): + ... r = shapefile.Reader("shapefiles/blockgroups") + ... for shapeRec in r.iterShapeRecords(): + ... w.record(*shapeRec.record) + ... w.shape(shapeRec.shape) + + >>> # check that the written file is 10 times longer + >>> len(w) == len(r) * 10 + True + + >>> # close the writer + >>> w.close() + +In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. -# How To's +### Editing shapefiles + +If you need to edit a shapefile you would have to read the +file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: + + >>> # create writer + >>> w = shapefile.Writer('shapefiles/test/edit') + + >>> # define which fields to keep + >>> keep_fields = ['BKG_KEY', 'MEDIANRENT'] + + >>> # copy over the relevant fields from the reader + >>> r = shapefile.Reader("shapefiles/blockgroups") + >>> for field in r.fields[1:]: + ... if field[0] in keep_fields: + ... w.field(*field) + + >>> # write only the relevant attribute values + >>> for shapeRec in r.iterShapeRecords(fields=keep_fields): + ... w.record(*shapeRec.record) + ... w.shape(shapeRec.shape) + + >>> # close writer + >>> w.close() ## 3D and Other Geometry Types -Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable of storing -various other types of geometries as well, including complex 3D surfaces and objects. +Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable +of storing various other types of geometries as well, including complex 3D surfaces and objects. + +### Shapefiles with measurement (M) values + +Measured shape types are shapes that include a measurement value at each vertex, for instance +speed measurements from a GPS device. Shapes with measurement (M) values are added with the following +methods: "pointm", "multipointm", "linem", and "polygonm". The M-values are specified by adding a +third M value to each XY coordinate. Missing or unobserved M-values are specified with a None value, +or by simply omitting the third M-coordinate. -**Shapefiles with measurement (M) values** -Measured shape types are shapes that include a measurement value at each vertex, for instance speed measurements from a GPS device. -Shapes with measurement (M) values are added with following methods: "pointm", "multipointm", "linem", and "polygonm". -The M-values are specified by adding a third M value to each XY coordinate. Missing or unobserved M-values are specified with a None value, -or by simply omitting the third M-coordinate. - - >>> w = shapefile.Writer('shapefiles/test/linem') >>> w.field('name', 'C') - + >>> w.linem([ ... [[1,5,0],[5,5],[5,1,3],[3,3,None],[1,1,0]], # line with one omitted and one missing M-value ... [[3,2],[2,6]] # line without any M-values ... ]) - + >>> w.record('linem1') - + >>> w.close() - + Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') - - >>> r.mbox # the lower and upper bound of M values in the shapefile + + >>> r.mbox # the lower and upper bound of M-values in the shapefile [0.0, 3.0] - - >>> r.shape(0).m # flat list of M values + + >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] - -**Shapefiles with elevation (Z) values** -Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. -Shapes with an elevation (Z) values are added with following methods: "pointz", "multipointz", "linez", and "polygonz". +### Shapefiles with elevation (Z) values + +Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. +Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". The Z-values are specified by adding a third Z value to each XY coordinate. Z-values do not support the concept of missing data, but if you omit the third Z-coordinate it will default to 0. Note that Z-type shapes also support measurement (M) values added -as a fourth M-coordinate. This too is optional. - - +as a fourth M-coordinate. This too is optional. + + >>> w = shapefile.Writer('shapefiles/test/linez') >>> w.field('name', 'C') - + >>> w.linez([ ... [[1,5,18],[5,5,20],[5,1,22],[3,3],[1,1]], # line with some omitted Z-values ... [[3,2],[2,6]], # line without any Z-values - ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z and M-values + ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z- and M-values ... ]) - + >>> w.record('linez1') - + >>> w.close() - + To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') - - >>> r.zbox # the lower and upper bound of Z values in the shapefile + + >>> r.zbox # the lower and upper bound of Z-values in the shapefile [0.0, 22.0] - - >>> r.shape(0).z # flat list of Z values + + >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] -**3D MultiPatch Shapefiles** +### 3D MultiPatch Shapefiles -Multipatch shapes are useful for storing composite 3-Dimensional objects. +Multipatch shapes are useful for storing composite 3-Dimensional objects. A MultiPatch shape represents a 3D object made up of one or more surface parts. -Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type -given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one +Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type is +given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. -For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent -its roof: +For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent +its roof: >>> from shapefile import TRIANGLE_STRIP, TRIANGLE_FAN - + >>> w = shapefile.Writer('shapefiles/test/multipatch') >>> w.field('name', 'C') - + >>> w.multipatch([ ... [[0,0,0],[0,0,3],[5,0,0],[5,0,3],[5,5,0],[5,5,3],[0,5,0],[0,5,3],[0,0,0],[0,0,3]], # TRIANGLE_STRIP for house walls ... [[2.5,2.5,5],[0,0,3],[5,0,3],[5,5,3],[0,5,3],[0,0,3]], # TRIANGLE_FAN for pointed house roof ... ], ... partTypes=[TRIANGLE_STRIP, TRIANGLE_FAN]) # one type for each part - - >>> w.record('house1') - - >>> w.close() - -For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this -ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). - -## Working with Large Shapefiles - -Despite being a lightweight library, PyShp is designed to be able to read and write -shapefiles of any size, allowing you to work with hundreds of thousands or even millions -of records and complex geometries. - -When first creating the Reader class, the library only reads the header information -and leaves the rest of the file contents alone. Once you call the records() and shapes() -methods however, it will attempt to read the entire file into memory at once. -For very large files this can result in MemoryError. So when working with large files -it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() -methods instead. These iterate through the file contents one at a time, enabling you to loop -through them while keeping memory usage at a minimum. - - - >>> for shape in sf.iterShapes(): - ... # do something here - ... pass - - >>> for rec in sf.iterRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf.iterShapeRecords(): - ... # do something here - ... pass - - >>> for shapeRec in sf: # same as iterShapeRecords() - ... # do something here - ... pass - -The shapefile Writer class uses a similar streaming approach to keep memory -usage at a minimum. The library takes care of this under-the-hood by immediately -writing each geometry and record to disk the moment they -are added using shape() or record(). Once the writer is closed, exited, or garbage -collected, the final header information is calculated and written to the beginning of -the file. - -This means that as long as you are able to iterate through a source file without having -to load everything into memory, such as a large CSV table or a large shapefile, you can -process and write any number of items, and even merging many different source files into a single -large shapefile. If you need to edit or undo any of your writing you would have to read the -file back in, one record at a time, make your changes, and write it back out. - -## Unicode and Shapefile Encodings - -PyShp has full support for unicode and shapefile encodings, so you can always expect to be working -with unicode strings in shapefiles that have text fields. -Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't -have to specify the encoding. For reading shapefiles in any other encoding, such as Latin-1, just -supply the encoding option when creating the Reader class. - - - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") - >>> r.record(0) == [2, u'Ă‘andĂş'] - True - -Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such -as UTF-8. Provided the new encoding supports the characters you are trying to write, reading it back in -should give you the same unicode string you started with. + >>> w.record('house1') - >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> w.fields = r.fields[1:] - >>> w.record(*r.record(0)) - >>> w.null() >>> w.close() - - >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") - >>> r.record(0) == [2, u'Ă‘andĂş'] - True - -If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an -exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore -or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This -applies to both reading and writing. + +For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this +ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). - >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") - >>> r.record(0) == [2, u'ďż˝andďż˝'] - True - # Testing -The testing framework is doctest, which are located in this file README.md. -In the same folder as README.md and shapefile.py, from the command line run +The testing framework is pytest, and the tests are located in test_shapefile.py. +This includes an extensive set of unit tests of the various pyshp features, +and tests against various input data. Some of the tests that require +internet connectivity will be skipped in offline testing environments. +In the same folder as README.md and shapefile.py, from the command line run +``` +$ python -m pytest +``` + +Additionally, all the code and examples located in this file, README.md, +is tested and verified with the builtin doctest framework. +A special routine for invoking the doctest is run when calling directly on shapefile.py. +In the same folder as README.md and shapefile.py, from the command line run ``` $ python shapefile.py -``` +``` + +Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order +to correct line endings in README.md, if Git has not automatically changed them. -Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order -correct line endings in README.md. # Contributors ``` Atle Frenvik Sveen Bas Couwenberg +Ben Beasley Casey Meisenzahl Charles Arnold David A. Riggs davidh-ssec Evan Heidtmann +ezcitron +fiveham geospatialpython Hannes Ignacio Martinez Vazquez +James Parrott Jason Moujaes +Jonty Wareing Karim Bahgat +karanrn +Kurt Schwehr Kyle Kelley +Lionel Guez Louis Tiao Marcin Cuprjak +mcuprjak Micah Cochran Michael Davis Michal ÄŚihaĹ™ Mike Toews +Miroslav Ĺ edivĂ˝ Nilo +pakoun Paulo Ernesto Raynor Vliegendhart Razzi Abuissa +RosBer97 Ross Rogers Ryan Brideau +Tim Gates Tobias Megies Tommi Penttinen Uli Köhler +Vsevolod Novikov Zac Miller ``` diff --git a/changelog.txt b/changelog.txt index ae1a02a..533d704 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,173 +1,271 @@ - -VERSION 2.0.1 - -2018-11-05 - * Fix pip install setup.py README decoding error. - -VERSION 2.0.0 - -2018-09-01 - (Note: Some contributor attributions may be missing.) - New Features: - * Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. - * Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] - - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - * Add more support and documentation for MultiPatch 3D shapes. - * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - * Better documentation of previously unclear aspects, such as field types. - - Bug Fixes: - * More reliable/robust: - - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. - - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - * Fix some geo interface errors, including checking polygon directions. - * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - * Enforce maximum field limit. [@mwtoews] - -VERSION 1.2.12 - * ? - -VERSION 1.2.11 - -2017-04-29 Karim Bahgat - * Fixed bugs when reading and writing empty shapefiles. - * Fixed bug when writing null geometry. - * Fixed misc data type errors. - * Fixed error when reading files with wrong record length. - * Use max field precision when saving decimal numbers. - * Improved shapetype detection. - * Expanded docs on data types. - * General doc additions and travis icon. - -VERSION 1.2.10 - -2016-09-24 Karim Bahgat - * Bump version to fix pip install issue. - -VERSION 1.2.9 - -2016-09-22 Karim Bahgat - * Revert back to fix #66. - -VERSION 1.2.8 - -2016-08-17 Joel Lawhead - * Configured Travis-CI - -VERSION 1.2.5 - -2016-08-16 Joel Lawhead - * Reader speed up through batch unpacking bytes - * Merge README text into markdown file. Remove text version. - * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) - -VERSON 1.2.3 - -2015-06-21 Joel Lawhead - *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() - -VERSION 1.2.2 - -### upcoming (2015/01/09 05:27 +00:00) -- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) -- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) -- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) -- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) -- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) -- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) -- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) -- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) -- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) -- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` -- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS -- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) -- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files -- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) -- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 -- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC -- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md -- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) - -VERSION 1.2.1 - -2014-05-11 Joel Lawhead - *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 - -VERSION 1.2.0 - -2013-09-05 Joel Lawhead - *README.txt add example/test for writing a 3D polygon - -VERSION 1.1.9 - -2013-07-27 Joel Lawhead - *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer - when referencing "z" and "m" values. This bug caused errors only when editing - 3D shapefiles. - -VERSION 1.1.8 - -2013-07-02 Joel Lawhead - *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes - *README.txt updated several errors in the documentation. - -2013-06-25 Joel Lawhead - *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by - seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file - lengths reported in the header which crashed when reading or iterating shapes. Most - insist on using the .shx file but there's no real reason to do so. - -VERSION 1.1.7 - -2013-06-22 Joel Lawhead - - *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention - to export shapefiles as GeoJSON. - - *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed - as unicode strings. - - *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through - geometry records for parsing large files efficiently. - - *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through - dbf records efficiently in large files. - - *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx - file is not available. - - *shapefile.py (main) Added __version__ attribute. - - *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to - dbf fields. - - *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The - shapefile spec does not require the content of a geometry record to be as long as the content - length defined in the header. The result is you can delete features without modifying the - record header allowing for empty space in records. - - *shapefile.py (Writer.poly) Added enforcement of closed polygons - - *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed - to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. - - *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() - - *README.txt (main) Updated "bbox" property documentation to match Esri specification. - - - + +VERSION 2.3.1 + +2022-07-28 + Bug fixes: + * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) + +VERSION 2.3.0 + +2022-04-30 + New Features: + * Added support for pathlib and path-like shapefile filepaths (@mwtoews). + * Allow reading individual file extensions via filepaths. + + Improvements: + * Simplified setup and deployment (@mwtoews) + * Faster shape access when missing shx file + * Switch to named logger (see #240) + + Bug fixes: + * More robust handling of corrupt shapefiles (fixes #235) + * Fix errors when writing to individual file-handles (fixes #237) + * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) + * Fix test issues in environments without network access (@sebastic, @musicinmybrain). + +VERSION 2.2.0 + +2022-02-02 + New Features: + * Read shapefiles directly from zipfiles. + * Read shapefiles directly from urls. + * Allow fast extraction of only a subset of dbf fields through a `fields` arg. + * Allow fast filtering which shapes to read from the file through a `bbox` arg. + + Improvements: + * More examples and restructuring of README. + * More informative Shape to geojson warnings (see #219). + * Add shapefile.VERBOSE flag to control warnings verbosity (default True). + * Shape object information when calling repr(). + * Faster ring orientation checks, enforce geojson output ring orientation. + + Bug fixes: + * Remove null-padding at end of some record character fields. + * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. + * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) + * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) + * Fix typos in docs (@timgates) + +VERSION 2.1.3 + +2021-01-14 + Bug fixes: + * Fix recent bug in geojson hole-in-polygon checking (see #205) + * Misc fixes to allow geo interface dump to json (eg dates as strings) + * Handle additional dbf date null values, and return faulty dates as unicode (see #187) + * Add writer target typecheck + * Fix bugs to allow reading shp/shx/dbf separately + * Allow delayed shapefile loading by passing no args + * Fix error with writing empty z/m shapefile (@mcuprjak) + * Fix signed_area() so ignores z/m coords + * Enforce writing the 11th field name character as null-terminator (only first 10 are used) + * Minor README fixes + * Added more tests + +VERSION 2.1.2 + +2020-09-10 + Bug fixes: + * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] + +VERSION 2.1.1 + +2020-09-09 + Improvements: + * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) + * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] + * Added pytest testing [@jmoujaes] + + Bug fixes: + * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] + * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] + * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] + * Fix polygons not being auto closed, which was accidentally dropped + * Fix error for null geometries in feature geojson + * Misc docstring cleanup [@fiveham] + +VERSION 2.1.0 + +2019-02-15 + New Features: + * Added back read/write support for unicode field names. + * Improved Record representation + * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() + + Bug fixes: + * Fixed error when reading optional m-values + * Fixed Record attribute autocomplete in Python 3 + * Misc readme cleanup + +VERSION 2.0.1 + +2018-11-05 + * Fix pip install setup.py README decoding error. + +VERSION 2.0.0 + +2018-09-01 + (Note: Some contributor attributions may be missing.) + New Features: + * Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. + * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. + * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. + * Reading shapefiles is now more convenient: + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. + - New ways of inspecing shapefile metadata by printing. [@megies] + - More convenient accessing of Record values as attributes. [@philippkraft] + - More convenient shape type name checking. [@megies] + * Add more support and documentation for MultiPatch 3D shapes. + * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. + * Better documentation of previously unclear aspects, such as field types. + + Bug Fixes: + * More reliable/robust: + - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] + - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] + - Improved parsing of field value types, fixed errors and made more flexible. + - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] + * Fix some geo interface errors, including checking polygon directions. + * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] + * Enforce maximum field limit. [@mwtoews] + +VERSION 1.2.12 + * ? + +VERSION 1.2.11 + +2017-04-29 Karim Bahgat + * Fixed bugs when reading and writing empty shapefiles. + * Fixed bug when writing null geometry. + * Fixed misc data type errors. + * Fixed error when reading files with wrong record length. + * Use max field precision when saving decimal numbers. + * Improved shapetype detection. + * Expanded docs on data types. + * General doc additions and travis icon. + +VERSION 1.2.10 + +2016-09-24 Karim Bahgat + * Bump version to fix pip install issue. + +VERSION 1.2.9 + +2016-09-22 Karim Bahgat + * Revert back to fix #66. + +VERSION 1.2.8 + +2016-08-17 Joel Lawhead + * Configured Travis-CI + +VERSION 1.2.5 + +2016-08-16 Joel Lawhead + * Reader speed up through batch unpacking bytes + * Merge README text into markdown file. Remove text version. + * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) + +VERSION 1.2.3 + +2015-06-21 Joel Lawhead + *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() + +VERSION 1.2.2 + +### upcoming (2015/01/09 05:27 +00:00) +- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) +- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) +- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) +- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) +- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) +- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) +- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) +- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) +- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) +- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` +- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS +- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) +- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files +- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) +- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 +- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC +- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md +- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) + +VERSION 1.2.1 + +2014-05-11 Joel Lawhead + *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 + +VERSION 1.2.0 + +2013-09-05 Joel Lawhead + *README.txt add example/test for writing a 3D polygon + +VERSION 1.1.9 + +2013-07-27 Joel Lawhead + *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer + when referencing "z" and "m" values. This bug caused errors only when editing + 3D shapefiles. + +VERSION 1.1.8 + +2013-07-02 Joel Lawhead + *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes + *README.txt updated several errors in the documentation. + +2013-06-25 Joel Lawhead + *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by + seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file + lengths reported in the header which crashed when reading or iterating shapes. Most + insist on using the .shx file but there's no real reason to do so. + +VERSION 1.1.7 + +2013-06-22 Joel Lawhead + + *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention + to export shapefiles as GeoJSON. + + *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed + as unicode strings. + + *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through + geometry records for parsing large files efficiently. + + *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through + dbf records efficiently in large files. + + *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx + file is not available. + + *shapefile.py (main) Added __version__ attribute. + + *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to + dbf fields. + + *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The + shapefile spec does not require the content of a geometry record to be as long as the content + length defined in the header. The result is you can delete features without modifying the + record header allowing for empty space in records. + + *shapefile.py (Writer.poly) Added enforcement of closed polygons + + *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed + to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. + + *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() + + *README.txt (main) Updated "bbox" property documentation to match Esri specification. + + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..df8e737 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,89 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py37" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + + + +[tool.pylint.MASTER] +load-plugins=[ + "pylint_per_file_ignores", +] + +[tool.pylint.'MESSAGES CONTROL'] +# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# member _from_geojson of a client class (protected-access) +# +# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: +# W0212: Access to a protected member _offsets of a +# client class (protected-access) +# +# Toml multi-line string used instead of array due to: +# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 +per-file-ignores = """ + shapefile.py:W0212 + test_shapefile.py:W0212 +""" \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..dbc031b --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + network: marks tests requiring network access diff --git a/requirements.test.txt b/requirements.test.txt new file mode 100644 index 0000000..1114173 --- /dev/null +++ b/requirements.test.txt @@ -0,0 +1,2 @@ +pytest >= 3.7 +setuptools diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..906abd3 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,30 @@ +[metadata] +name = pyshp +version = attr: shapefile.__version__ +description = Pure Python read/write support for ESRI Shapefile format +long_description = file: README.md +long_description_content_type = text/markdown +author = Joel Lawhead +author_email = jlawhead@geospatialpython.com +maintainer = Karim Bahgat +maintainer_email = karim.bahgat.norway@gmail.com +url = https://github.com/GeospatialPython/pyshp +download_url = https://pypi.org/project/pyshp/ +license = MIT +license_files = LICENSE.TXT +keywords = gis, geospatial, geographic, shapefile, shapefiles +classifiers = + Development Status :: 5 - Production/Stable + Programming Language :: Python + Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3 + Topic :: Scientific/Engineering :: GIS + Topic :: Software Development :: Libraries + Topic :: Software Development :: Libraries :: Python Modules + +[options] +py_modules = shapefile +python_requires = >=2.7 + +[bdist_wheel] +universal=1 diff --git a/setup.py b/setup.py index bc72c73..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,28 +1,3 @@ from setuptools import setup - -def read_file(file): - with open(file, 'rb') as fh: - data = fh.read() - return data.decode('utf-8') - -setup(name='pyshp', - version='2.0.1', - description='Pure Python read/write support for ESRI Shapefile format', - long_description=read_file('README.md'), - long_description_content_type='text/markdown', - author='Joel Lawhead', - author_email='jlawhead@geospatialpython.com', - url='https://github.com/GeospatialPython/pyshp', - download_url='https://github.com/GeospatialPython/pyshp/archive/2.0.1.tar.gz', - py_modules=['shapefile'], - license='MIT', - zip_safe=False, - keywords='gis geospatial geographic shapefile shapefiles', - python_requires='>= 2.7', - classifiers=['Programming Language :: Python', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering :: GIS', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules']) +setup() diff --git a/shapefile.py b/shapefile.py index f48fca6..2b22ba9 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1,1856 +1,2831 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -author: jlawheadgeospatialpython.com -version: 2.0.1 -Compatible with Python versions 2.7-3.x -""" - -__version__ = "2.0.1" - -from struct import pack, unpack, calcsize, error, Struct -import os -import sys -import time -import array -import tempfile -import warnings -import io -from datetime import date - - -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -SHAPETYPE_LOOKUP = { - 0: 'NULL', - 1: 'POINT', - 3: 'POLYLINE', - 5: 'POLYGON', - 8: 'MULTIPOINT', - 11: 'POINTZ', - 13: 'POLYLINEZ', - 15: 'POLYGONZ', - 18: 'MULTIPOINTZ', - 21: 'POINTM', - 23: 'POLYLINEM', - 25: 'POLYGONM', - 28: 'MULTIPOINTM', - 31: 'MULTIPATCH'} - -TRIANGLE_STRIP = 0 -TRIANGLE_FAN = 1 -OUTER_RING = 2 -INNER_RING = 3 -FIRST_RING = 4 -RING = 5 - -PARTTYPE_LOOKUP = { - 0: 'TRIANGLE_STRIP', - 1: 'TRIANGLE_FAN', - 2: 'OUTER_RING', - 3: 'INNER_RING', - 4: 'FIRST_RING', - 5: 'RING'} - - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip -else: - from itertools import izip - - -# Helpers - -MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. - -if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return u"" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - - -# Begin - -class _Array(array.array): - """Converts python tuples to lits of the appropritate type. - Used to unpack different shapefile header parts.""" - def __repr__(self): - return str(self.tolist()) - -def signed_area(coords): - """Return the signed area enclosed by a ring using the linear time - algorithm. A value >= 0 indicates a counter-clockwise oriented ring. - """ - xs, ys = map(list, zip(*coords)) - xs.append(xs[1]) - ys.append(ys[1]) - return sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords)))/2.0 - -class Shape(object): - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example verticies in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. - """ - self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] - if partTypes: - self.partTypes = partTypes - - @property - def __geo_interface__(self): - if not self.parts or not self.points: - Exception('Invalid shape, cannot create GeoJSON representation. Shape type is "%s" but does not contain any parts and/or points.' % SHAPETYPE_LOOKUP[self.shapeType]) - - if self.shapeType in [POINT, POINTM, POINTZ]: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - return { - 'type': 'MultiPoint', - 'coordinates': tuple([tuple(p) for p in self.points]) - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 1: - return { - 'type': 'LineString', - 'coordinates': tuple([tuple(p) for p in self.points]) - } - else: - ps = None - coordinates = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - coordinates.append(tuple([tuple(p) for p in self.points[ps:part]])) - ps = part - else: - coordinates.append(tuple([tuple(p) for p in self.points[part:]])) - return { - 'type': 'MultiLineString', - 'coordinates': tuple(coordinates) - } - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 1: - return { - 'type': 'Polygon', - 'coordinates': (tuple([tuple(p) for p in self.points]),) - } - else: - ps = None - rings = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - rings.append(tuple([tuple(p) for p in self.points[ps:part]])) - ps = part - else: - rings.append(tuple([tuple(p) for p in self.points[part:]])) - polys = [] - poly = [rings[0]] - for ring in rings[1:]: - if signed_area(ring) < 0: - polys.append(poly) - poly = [ring] - else: - poly.append(ring) - polys.append(poly) - if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': tuple(polys[0]) - } - elif len(polys) > 1: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } - else: - raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) - - @staticmethod - def _from_geojson(geoj): - # create empty shape - shape = Shape() - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON - else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) - shape.shapeType = shapeType - - # set points and parts - if geojType == "Point": - shape.points = [ geoj["coordinates"] ] - shape.parts = [0] - elif geojType in ("MultiPoint","LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon"): - points = [] - parts = [] - index = 0 - for i,ext_or_hole in enumerate(geoj["coordinates"]): - if i == 0 and not signed_area(ext_or_hole) < 0: - # flip exterior direction - ext_or_hole = list(reversed(ext_or_hole)) - elif i > 0 and not signed_area(ext_or_hole) >= 0: - # flip hole direction - ext_or_hole = list(reversed(ext_or_hole)) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString"): - points = [] - parts = [] - index = 0 - for linestring in geoj["coordinates"]: - points.extend(linestring) - parts.append(index) - index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon"): - points = [] - parts = [] - index = 0 - for polygon in geoj["coordinates"]: - for i,ext_or_hole in enumerate(polygon): - if i == 0 and not signed_area(ext_or_hole) < 0: - # flip exterior direction - ext_or_hole = list(reversed(ext_or_hole)) - elif i > 0 and not signed_area(ext_or_hole) >= 0: - # flip hole direction - ext_or_hole = list(reversed(ext_or_hole)) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - -class _Record(list): - """ - A class to hold a record. Subclasses list to ensure compatibility with - former work and allows to use all the optimazations of the builtin list. - In addition to the list interface, the values of the record - can also be retrieved using the fields name. Eg. if the dbf contains - a field ID at position 0, the ID can be retrieved with the position, the field name - as a key or the field name as an attribute. - - >>> # Create a Record with one field, normally the record is created by the Reader class - >>> r = _Record({'ID': 0}, [0]) - >>> print(r[0]) - >>> print(r['ID']) - >>> print(r.ID) - """ - - def __init__(self, field_positions, values, oid=None): - """ - A Record should be created by the Reader class - - :param field_positions: A dict mapping field names to field positions - :param values: A sequence of values - :param oid: The object id, an int (optional) - """ - self.__field_positions = field_positions - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - list.__init__(self, values) - - def __getattr__(self, item): - """ - __getattr__ is called if an attribute is used that does - not exist in the normal sense. Eg. r=Record(...), r.ID - calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) - :param item: The field name, used as attribute - :return: Value of the field - :raises: Attribute error, if field does not exist - and IndexError, if field exists but not values in the Record - """ - try: - index = self.__field_positions[item] - return list.__getitem__(self, index) - except KeyError: - raise AttributeError('{} is not a field name'.format(item)) - except IndexError: - raise IndexError('{} found as a field but not enough values available.'.format(item)) - - def __setattr__(self, key, value): - """ - Sets a value of a field attribute - :param key: The field name - :param value: the value of that field - :return: None - :raises: AttributeError, if key is not a field of the shapefile - """ - if key.startswith('_'): # Prevent infinite loop when setting mangled attribute - return list.__setattr__(self, key, value) - try: - index = self.__field_positions[key] - return list.__setitem__(self, index, value) - except KeyError: - raise AttributeError('{} is not a field name'.format(key)) - - def __getitem__(self, item): - """ - Extends the normal list item access with - access using a fieldname - - Eg. r['ID'], r[0] - :param item: Either the position of the value or the name of a field - :return: the value of the field - """ - try: - return list.__getitem__(self, item) - except TypeError: - try: - index = self.__field_positions[item] - except KeyError: - index = None - if index is not None: - return list.__getitem__(self, index) - else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) - - def __setitem__(self, key, value): - """ - Extends the normal list item access with - access using a fieldname - - Eg. r['ID']=2, r[0]=2 - :param key: Either the position of the value or the name of a field - :param value: the new value of the field - """ - try: - return list.__setitem__(self, key, value) - except TypeError: - index = self.__field_positions.get(key) - if index is not None: - return list.__setitem__(self, index, value) - else: - raise IndexError('{} is not a field name and not an int'.format(key)) - - @property - def oid(self): - """The index position of the record in the original shapefile""" - return self.__oid - - def as_dict(self): - """ - Returns this Record as a dictionary using the field names as keys - :return: dict - """ - return dict((f, self[i]) for f, i in self.__field_positions.items()) - - def __str__(self): - return 'Record #{} '.format(self.__oid) - - def __dir__(self): - """ - Helps to show the field names in an interactive environment like IPython. - See: http://ipython.readthedocs.io/en/stable/config/integrating.html - - :return: List of method names and fields - """ - attrs = [attr for attr in vars(type(self)) if not attr.startswith('_')] - return attrs + self.__field_positions.values() # plus field names (random order) - -class ShapeRecord(object): - """A ShapeRecord object containing a shape along with its attributes.""" - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - pass - -class Reader(object): - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.fields = [] - self.__dbfHdrLength = 0 - self.__fieldposition_lookup = {} - self.encoding = kwargs.pop('encoding', 'utf-8') - self.encodingErrors = kwargs.pop('encodingErrors', 'strict') - # See if a shapefile name was passed as an argument - if len(args) > 0: - if is_string(args[0]): - self.load(args[0]) - return - if "shp" in kwargs.keys(): - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - if "shx" in kwargs.keys(): - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - if "dbf" in kwargs.keys(): - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - if self.shp or self.dbf: - self.load() - else: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - - def __str__(self): - """ - Use some general info on the shapefile as __str__ - """ - info = ['shapefile Reader'] - if self.shp: - info.append(" {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType])) - if self.dbf: - info.append(' {} records ({} fields)'.format( - len(self), len(self.fields))) - return '\n'.join(info) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, close opened files. - """ - self.close() - - def __len__(self): - """Returns the number of shapes/records in the shapefile.""" - return self.numRecords - - def __iter__(self): - """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec - - @property - def __geo_interface__(self): - fieldnames = [f[0] for f in self.fields] - features = [] - for feat in self.iterShapeRecords(): - fdict = {'type': 'Feature', - 'properties': dict(*zip(fieldnames, - list(feat.record) - )), - 'geometry': feat.shape.__geo_interface__} - features.append(fdict) - return {'type': 'FeatureCollection', - 'bbox': self.bbox, - 'features': features} - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file name as an argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - self.load_shp(shapeName) - self.load_shx(shapeName) - self.load_dbf(shapeName) - if not (self.shp or self.dbf): - raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - - def load_shp(self, shapefile_name): - """ - Attempts to load file with .shp extension as both lower and upper case - """ - shp_ext = 'shp' - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") - except IOError: - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") - except IOError: - pass - - def load_shx(self, shapefile_name): - """ - Attempts to load file with .shx extension as both lower and upper case - """ - shx_ext = 'shx' - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") - except IOError: - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") - except IOError: - pass - - def load_dbf(self, shapefile_name): - """ - Attempts to load file with .dbf extension as both lower and upper case - """ - dbf_ext = 'dbf' - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") - except IOError: - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") - except IOError: - pass - - def __del__(self): - self.close() - - def close(self): - for attribute in (self.shp, self.shx, self.dbf): - if hasattr(attribute, 'close'): - try: - attribute.close() - except IOError: - pass - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") - if i < 0: i = range(self.numRecords)[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp or .shx file.""" - if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType= unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) - - def __shape(self): - """Returns the header info and geometry for a single shape.""" - f = self.__getFileObj(self.shp) - record = Shape() - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack(" NODATA: - record.m.append(m) - else: - record.m.append(None) - # Read a single point - if shapeType in (1,11,21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] - # Read a single Z value - if shapeType == 11: - record.z = list(unpack(" NODATA: - record.m = [m] - else: - record.m = [None] - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - f.seek(next) - return record - - def __shapeIndex(self, i=None): - """Returns the offset in a .shp file for a shape based on information - in the .shx index file.""" - shx = self.shx - if not shx: - return None - if not self._offsets: - # File length (16-bit word * 2 = bytes) - header length - shx.seek(24) - shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 - numRecords = shxRecordLength // 8 - # Jump to the first record. - shx.seek(100) - shxRecords = _Array('i') - # Each offset consists of two nrs, only the first one matters - shxRecords.fromfile(shx, 2 * numRecords) - if sys.byteorder != 'big': - shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] - if not i == None: - return self._offsets[i] - - def shape(self, i=0): - """Returns a shape object for a shape in the the geometry - record file.""" - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available so iterate the full list. - for j,k in enumerate(self.iterShapes()): - if j == i: - return k - shp.seek(offset) - return self.__shape() - - def shapes(self): - """Returns all shapes in a shapefile.""" - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0,2) - self.shpLength = shp.tell() - shp.seek(100) - shapes = [] - while shp.tell() < self.shpLength: - shapes.append(self.__shape()) - return shapes - - def iterShapes(self): - """Serves up shapes in a shapefile as an iterator. Useful - for handling large shapefiles.""" - shp = self.__getFileObj(self.shp) - shp.seek(0,2) - self.shpLength = shp.tell() - shp.seek(100) - while shp.tell() < self.shpLength: - yield self.__shape() - - def __dbfHeader(self): - """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") - dbf = self.dbf - # read relevant header parts - self.numRecords, self.__dbfHdrLength, self.__recordLength = \ - unpack(" 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [min(bbox[0],self._bbox[0]), min(bbox[1],self._bbox[1]), max(bbox[2],self._bbox[2]), max(bbox[3],self._bbox[3])] - else: - # first time bbox is being set - self._bbox = bbox - return bbox - - def __zbox(self, s): - z = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elavation - z.append(0) - zbox = [min(z), max(z)] - # update global - if self._zbox: - # compare with existing - self._zbox = [min(zbox[0],self._zbox[0]), max(zbox[1],self._zbox[1])] - else: - # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s): - mpos = 3 if s.shapeType in (11,13,15,18,31) else 2 - m = [] - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: - # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = [min(m), max(m)] - # update global - if self._mbox: - # compare with existing - self._mbox = [min(mbox[0],self._mbox[0]), max(mbox[1],self._mbox[1])] - else: - # first time mbox is being set - self._mbox = mbox - return mbox - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def bbox(self): - """Returns the current bounding box for the shapefile which is - the lower-left and upper-right corners. It does not contain the - elevation or measure extremes.""" - return self._bbox - - def zbox(self): - """Returns the current z extremes for the shapefile.""" - return self._zbox - - def mbox(self): - """Returns the current m extremes for the shapefile.""" - return self._mbox - - def __shapefileHeader(self, fileObj, headerType='shp'): - """Writes the specified header type to the specified file-like object. - Several of the shapefile formats are so similar that a single generic - method to read or write them is warranted.""" - f = self.__getFileObj(fileObj) - f.seek(0) - # File code, Unused bytes - f.write(pack(">6i", 9994,0,0,0,0,0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': - f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) - # Version, Shape type - if self.shapeType is None: - self.shapeType = NULL - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - bbox = self.bbox() - if bbox is None: - # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. - # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. - # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. - # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] - f.write(pack("<4d", *bbox)) - except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") - else: - f.write(pack("<4d", 0,0,0,0)) - # Elevation - if self.shapeType in (11,13,15,18): - # Z values are present in Z type - zbox = self.zbox() - else: - # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0,0] - # Measure - if self.shapeType in (11,13,15,18,21,23,25,28,31): - # M values are present in M or Z type - mbox = self.mbox() - else: - # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0,0] - # Try writing - try: - f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) - except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Remove deletion flag placeholder from fields - for field in self.fields: - if str(field[0]).startswith("Deletion"): - self.fields.remove(field) - numRecs = self.recNum - numFields = len(self.fields) - headerLength = numFields * 32 + 33 - if headerLength >= 65535: - raise ShapefileException( - "Shapefile dbf header length exceeds maximum length.") - recordLength = sum([int(field[2]) for field in self.fields]) + 1 - header = pack('2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception("The shape's type (%s) must match the type of the shapefile (%s)." % (s.shapeType, self.shapeType)) - f.write(pack(" 2 else 0)) for p in s.points] - except error: - raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13,15,18,23,25,28,31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) - try: - if hasattr(s,"m"): - # if m values are stored in attribute - f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13,15,18,31) else 2 - [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] - except error: - raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) - # Write a single point - if s.shapeType in (1,11,21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack("i", length)) - f.seek(finish) - return offset,length - - def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - f.write(pack(">i", offset // 2)) - f.write(pack(">i", length)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - fields() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - # Balance if already not balanced - if self.autoBalance and self.recNum > self.shpNum: - self.balance() - - record = [] - fieldCount = len(self.fields) - # Compensate for deletion flag - if self.fields[0][0].startswith("Deletion"): fieldCount -= 1 - if recordList: - record = [recordList[i] for i in range(fieldCount)] - elif recordDict: - for field in self.fields: - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - else: - # Blank fields for empty record - record = ["" for i in range(fieldCount)] - self.__dbfRecord(record) - - def __dbfRecord(self, record): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # begin - self.recNum += 1 - if not self.fields[0][0].startswith("Deletion"): - f.write(b' ') # deletion flag - for (fieldName, fieldType, size, deci), value in zip(self.fields, record): - fieldType = fieldType.upper() - size = int(size) - if fieldType in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*"*size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size - elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) - elif isinstance(value, list) and len(value) == 3: - value = '{:04d}{:02d}{:02d}'.format(*value) - elif value in MISSING: - value = b'0' * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: - pass # value is already a date string - else: - raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") - elif fieldType == 'L': - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - value = b' ' # missing is set to space - elif value in [True,1]: - value = b'T' - elif value in [False,0]: - value = b'F' - else: - value = b' ' # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding - if len(value) != size: - raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) - f.write(value) - - def balance(self): - """Adds corresponding empty attributes or null geometry records depending - on which type of record was created to make sure all three files - are in synch.""" - while self.recNum > self.shpNum: - self.null() - while self.recNum < self.shpNum: - self.record() - - - def null(self): - """Creates a null shape.""" - self.shape(Shape(NULL)) - - - def point(self, x, y): - """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) - pointShape.points.append([x, y]) - self.shape(pointShape) - - def pointm(self, x, y, m=None): - """Creates a POINTM shape. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) - pointShape.points.append([x, y, m]) - self.shape(pointShape) - - def pointz(self, x, y, z=0, m=None): - """Creates a POINTZ shape. - If the z (elevation) value is not set, it defaults to 0. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) - pointShape.points.append([x, y, z, m]) - self.shape(pointShape) - - - def multipoint(self, points): - """Creates a MULTIPOINT shape. - Points is a list of xy values.""" - shapeType = MULTIPOINT - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointm(self, points): - """Creates a MULTIPOINTM shape. - Points is a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointz(self, points): - """Creates a MULTIPOINTZ shape. - Points is a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - - def line(self, lines): - """Creates a POLYLINE shape. - Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) - - def linem(self, lines): - """Creates a POLYLINEM shape. - Lines is a collection of lines, each made up of a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) - - def linez(self, lines): - """Creates a POLYLINEZ shape. - Lines is a collection of lines, each made up of a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) - - - def poly(self, polys): - """Creates a POLYGON shape. - Polys is a collection of polygons, each made up of a list of xy values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) - - def polym(self, polys): - """Creates a POLYGONM shape. - Polys is a collection of polygons, each made up of a list of xym values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) - - def polyz(self, polys): - """Creates a POLYGONZ shape. - Polys is a collection of polygons, each made up of a list of xyzm values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) - - - def multipatch(self, parts, partTypes): - """Creates a MULTIPATCH shape. - Parts is a collection of 3D surface patches, each made up of a list of xyzm values. - PartTypes is a list of types that define each of the surface patches. - The types can be any of the following module constants: TRIANGLE_STRIP, - TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elavation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - - def _shapeparts(self, parts, shapeType): - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - # write the shape - self.shape(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = "8" - decimal = 0 - elif fieldType == "L": - size = "1" - decimal = 0 - if len(self.fields) >= 2046: - raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046.") - self.fields.append((name, fieldType, size, decimal)) - -## def saveShp(self, target): -## """Save an shp file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shp' -## self.shp = self.__getFileObj(target) -## self.__shapefileHeader(self.shp, headerType='shp') -## self.shp.seek(100) -## self._shp.seek(0) -## chunk = True -## while chunk: -## chunk = self._shp.read(self.bufsize) -## self.shp.write(chunk) -## -## def saveShx(self, target): -## """Save an shx file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shx' -## self.shx = self.__getFileObj(target) -## self.__shapefileHeader(self.shx, headerType='shx') -## self.shx.seek(100) -## self._shx.seek(0) -## chunk = True -## while chunk: -## chunk = self._shx.read(self.bufsize) -## self.shx.write(chunk) -## -## def saveDbf(self, target): -## """Save a dbf file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.dbf' -## self.dbf = self.__getFileObj(target) -## self.__dbfHeader() # writes to .dbf -## self._dbf.seek(0) -## chunk = True -## while chunk: -## chunk = self._dbf.read(self.bufsize) -## self.dbf.write(chunk) - -## def save(self, target=None, shp=None, shx=None, dbf=None): -## """Save the shapefile data to three files or -## three file-like objects. SHP and DBF files can also -## be written exclusively using saveShp, saveShx, and saveDbf respectively. -## If target is specified but not shp, shx, or dbf then the target path and -## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. -## """ -## # Balance if already not balanced -## if shp and dbf: -## if self.autoBalance: -## self.balance() -## if self.recNum != self.shpNum: -## raise ShapefileException("When saving both the dbf and shp file, " -## "the number of records (%s) must correspond " -## "with the number of shapes (%s)" % (self.recNum, self.shpNum)) -## # Save -## if shp: -## self.saveShp(shp) -## if shx: -## self.saveShx(shx) -## if dbf: -## self.saveDbf(dbf) -## # Create a unique file name if one is not defined -## if not shp and not shx and not dbf: -## generated = False -## if not target: -## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) -## target = temp.name -## generated = True -## self.saveShp(target) -## self.shp.close() -## self.saveShx(target) -## self.shx.close() -## self.saveDbf(target) -## self.dbf.close() -## if generated: -## return target - -# Begin Testing -def test(**kwargs): - import doctest - doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get('verbose', 0) - if verbosity == 0: - print('Running doctests...') - - # ignore py2-3 unicode differences - import re - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - def summarize(self): - doctest.OutputChecker.summarize(True) - - # run tests - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md","rb") as fobj: - test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8"), globs={}, name="README", filename="README.md", lineno=0) - failure_count, test_count = runner.run(test) - - # print results - if verbosity: - runner.summarize(True) - else: - if failure_count == 0: - print('All test passed successfully') - elif failure_count > 0: - runner.summarize(verbosity) - - return failure_count - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = test() - sys.exit(failure_count) +""" +shapefile.py +Provides read and write support for ESRI Shapefiles. +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +Compatible with Python versions 2.7-3.x +""" + +__version__ = "2.3.1" + +import array +import io +import logging +import os +import sys +import tempfile +import time +import zipfile +from datetime import date +from struct import Struct, calcsize, error, pack, unpack + +# Create named logger +logger = logging.getLogger(__name__) + +# Module settings +VERBOSE = True + +# Constants for shape types +NULL = 0 +POINT = 1 +POLYLINE = 3 +POLYGON = 5 +MULTIPOINT = 8 +POINTZ = 11 +POLYLINEZ = 13 +POLYGONZ = 15 +MULTIPOINTZ = 18 +POINTM = 21 +POLYLINEM = 23 +POLYGONM = 25 +MULTIPOINTM = 28 +MULTIPATCH = 31 + +SHAPETYPE_LOOKUP = { + 0: "NULL", + 1: "POINT", + 3: "POLYLINE", + 5: "POLYGON", + 8: "MULTIPOINT", + 11: "POINTZ", + 13: "POLYLINEZ", + 15: "POLYGONZ", + 18: "MULTIPOINTZ", + 21: "POINTM", + 23: "POLYLINEM", + 25: "POLYGONM", + 28: "MULTIPOINTM", + 31: "MULTIPATCH", +} + +TRIANGLE_STRIP = 0 +TRIANGLE_FAN = 1 +OUTER_RING = 2 +INNER_RING = 3 +FIRST_RING = 4 +RING = 5 + +PARTTYPE_LOOKUP = { + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} + + +# Python 2-3 handling + +PYTHON3 = sys.version_info[0] == 3 + +if PYTHON3: + xrange = range + izip = zip + + from urllib.error import HTTPError + from urllib.parse import urlparse, urlunparse + from urllib.request import Request, urlopen + +else: + from itertools import izip + + from urllib2 import HTTPError, Request, urlopen + from urlparse import urlparse, urlunparse + + +# Helpers + +MISSING = [None, ""] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + +if PYTHON3: + + def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + + def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, str) + +else: + + def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, unicode): + # For python 2 encode unicode to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return unicode(v).encode(encoding, encodingErrors) + + def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 2 decode bytes to unicode. + return v.decode(encoding, encodingErrors) + elif isinstance(v, unicode): + # Already unicode. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, basestring) + + +if sys.version_info[0:2] >= (3, 6): + + def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path +else: + + def pathlike_obj(path): + if is_string(path): + return path + elif hasattr(path, "__fspath__"): + return path.__fspath__() + else: + try: + return str(path) + except: + return path + + +# Begin + + +class _Array(array.array): + """Converts python tuples to lists of the appropriate type. + Used to unpack different shapefile header parts.""" + + def __repr__(self): + return str(self.tolist()) + + +def signed_area(coords, fast=False): + """Return the signed area enclosed by a ring using the linear time + algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. + """ + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs.append(xs[1]) + ys.append(ys[1]) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) + if fast: + return area2 + else: + return area2 / 2.0 + + +def is_cw(coords): + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + + +def rewind(coords): + """Returns the input coords in reversed order.""" + return list(reversed(coords)) + + +def ring_bbox(coords): + """Calculates and returns the bounding box of a ring.""" + xs, ys = zip(*coords) + bbox = min(xs), min(ys), max(xs), max(ys) + return bbox + + +def bbox_overlap(bbox1, bbox2): + """Tests whether two bounding boxes overlap, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 + return overlap + + +def bbox_contains(bbox1, bbox2): + """Tests whether bbox1 fully contains bbox2, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 + return contains + + +def ring_contains_point(coords, p): + """Fast point-in-polygon crossings algorithm, MacMartin optimization. + + Adapted from code by Eric Haynes + http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c + + Original description: + Shoot a test ray along +X axis. The strategy, from MacMartin, is to + compare vertex Y values to the testing point's Y and quickly discard + edges which are entirely to one side of the test ray. + """ + tx, ty = p + + # get initial test bit for above/below X axis + vtx0 = coords[0] + yflag0 = vtx0[1] >= ty + + inside_flag = False + for vtx1 in coords[1:]: + yflag1 = vtx1[1] >= ty + # check if endpoints straddle (are on opposite sides) of X axis + # (i.e. the Y's differ); if so, +X ray could intersect this edge. + if yflag0 != yflag1: + xflag0 = vtx0[0] >= tx + # check if endpoints are on same side of the Y axis (i.e. X's + # are the same); if so, it's easy to test if edge hits or misses. + if xflag0 == (vtx1[0] >= tx): + # if edge's X values both right of the point, must hit + if xflag0: + inside_flag = not inside_flag + else: + # compute intersection of pgon segment with +X ray, note + # if >= point's X; if so, the ray hits it. + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: + inside_flag = not inside_flag + + # move to next pair of vertices, retaining info as possible + yflag0 = yflag1 + vtx0 = vtx1 + + return inside_flag + + +def ring_sample(coords, ccw=False): + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + triplet = [] + + def itercoords(): + # iterate full closed ring + for p in coords: + yield p + # finally, yield the second coordinate to the end to allow checking the last triplet + yield coords[1] + + for p in itercoords(): + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = not is_cw(closed_triplet) + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + else: + raise Exception("Unexpected error: Unable to find a ring sample point.") + + +def ring_contains_ring(coords1, coords2): + """Returns True if all vertexes in coords2 are fully inside coords1.""" + return all((ring_contains_point(coords1, p2) for p2 in coords2)) + + +def organize_polygon_rings(rings, return_errors=None): + """Organize a list of coordinate rings into one or more polygons with holes. + Returns a list of polygons, where each polygon is composed of a single exterior + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. + + Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). + Rings are determined as exteriors if they run in clockwise direction, or interior + holes if they run in counter-clockwise direction. This method is used to construct + GeoJSON (multi)polygons from the shapefile polygon shape type, which does not + explicitly store the structure of the polygons beyond exterior/interior ring orientation. + """ + # first iterate rings and classify as exterior or hole + exteriors = [] + holes = [] + for ring in rings: + # shapefile format defines a polygon as a sequence of rings + # where exterior rings are clockwise, and holes counterclockwise + if is_cw(ring): + # ring is exterior + exteriors.append(ring) + else: + # ring is a hole + holes.append(ring) + + # if only one exterior, then all holes belong to that exterior + if len(exteriors) == 1: + # exit early + poly = [exteriors[0]] + holes + polys = [poly] + return polys + + # multiple exteriors, ie multi-polygon, have to group holes with correct exterior + # shapefile format does not specify which holes belong to which exteriors + # so have to do efficient multi-stage checking of hole-to-exterior containment + elif len(exteriors) > 1: + # exit early if no holes + if not holes: + polys = [] + for ext in exteriors: + poly = [ext] + polys.append(poly) + return polys + + # first determine each hole's candidate exteriors based on simple bbox contains test + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): + hole_bbox = ring_bbox(holes[hole_i]) + for ext_i, ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): + hole_exteriors[hole_i].append(ext_i) + + # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # get hole sample point + ccw = not is_cw(holes[hole_i]) + hole_sample = ring_sample(holes[hole_i], ccw=ccw) + # collect new exterior candidates + new_exterior_candidates = [] + for ext_i in exterior_candidates: + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) + if hole_in_exterior: + new_exterior_candidates.append(ext_i) + + # set new exterior candidates + hole_exteriors[hole_i] = new_exterior_candidates + + # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # exterior candidate with the smallest area is the hole's most immediate parent + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] + hole_exteriors[hole_i] = [ext_i] + + # separate out holes that are orphaned (not contained by any exterior) + orphan_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + if not exterior_candidates: + orphan_holes.append(hole_i) + del hole_exteriors[hole_i] + continue + + # each hole should now only belong to one exterior, group into exterior-holes polygons + polys = [] + for ext_i, ext in enumerate(exteriors): + poly = [ext] + # find relevant holes + poly_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + # hole is relevant if previously matched with this exterior + if exterior_candidates[0] == ext_i: + poly_holes.append(holes[hole_i]) + poly += poly_holes + polys.append(poly) + + # add orphan holes as exteriors + for hole_i in orphan_holes: + ext = holes[hole_i] + # add as single exterior without any holes + poly = [ext] + polys.append(poly) + + if orphan_holes and return_errors is not None: + return_errors["polygon_orphaned_holes"] = len(orphan_holes) + + return polys + + # no exteriors, be nice and assume due to incorrect winding order + else: + if return_errors is not None: + return_errors["polygon_only_holes"] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys + + +class Shape(object): + def __init__( + self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + ): + """Stores the geometry of the different shape types + specified in the Shapefile spec. Shape types are + usually point, polyline, or polygons. Every shape type + except the "Null" type contains points at some level for + example vertices in a polygon. If a shape type has + multiple shapes containing points within a single + geometry record then those shapes are called parts. Parts + are designated by their starting index in geometry record's + list of shapes. For MultiPatch geometry, partTypes designates + the patch type of each of the parts. + """ + self.shapeType = shapeType + self.points = points or [] + self.parts = parts or [] + if partTypes: + self.partTypes = partTypes + + # and a dict to silently record any errors encountered + self._errors = {} + + # add oid + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + + @property + def __geo_interface__(self): + if self.shapeType in [POINT, POINTM, POINTZ]: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Point", "coordinates": tuple()} + else: + return {"type": "Point", "coordinates": tuple(self.points[0])} + elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "MultiPoint", "coordinates": []} + else: + # multipoint + return { + "type": "MultiPoint", + "coordinates": [tuple(p) for p in self.points], + } + elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "LineString", "coordinates": []} + elif len(self.parts) == 1: + # linestring + return { + "type": "LineString", + "coordinates": [tuple(p) for p in self.points], + } + else: + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + else: + coordinates.append([tuple(p) for p in self.points[ps:part]]) + ps = part + else: + coordinates.append([tuple(p) for p in self.points[part:]]) + return {"type": "MultiLineString", "coordinates": coordinates} + elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Polygon", "coordinates": []} + else: + # get all polygon rings + rings = [] + for i in xrange(len(self.parts)): + # get indexes of start and end points of the ring + start = self.parts[i] + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = [tuple(p) for p in self.points[start:end]] + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( + self.oid + ) + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + else: + return {"type": "MultiPolygon", "coordinates": polys} + + else: + raise Exception( + 'Shape type "%s" cannot be represented as GeoJSON.' + % SHAPETYPE_LOOKUP[self.shapeType] + ) + + @staticmethod + def _from_geojson(geoj): + # create empty shape + shape = Shape() + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType == "Null": + shapeType = NULL + elif geojType == "Point": + shapeType = POINT + elif geojType == "LineString": + shapeType = POLYLINE + elif geojType == "Polygon": + shapeType = POLYGON + elif geojType == "MultiPoint": + shapeType = MULTIPOINT + elif geojType == "MultiLineString": + shapeType = POLYLINE + elif geojType == "MultiPolygon": + shapeType = POLYGON + else: + raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) + shape.shapeType = shapeType + + # set points and parts + if geojType == "Point": + shape.points = [geoj["coordinates"]] + shape.parts = [0] + elif geojType in ("MultiPoint", "LineString"): + shape.points = geoj["coordinates"] + shape.parts = [0] + elif geojType in ("Polygon"): + points = [] + parts = [] + index = 0 + for i, ext_or_hole in enumerate(geoj["coordinates"]): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + elif geojType in ("MultiLineString"): + points = [] + parts = [] + index = 0 + for linestring in geoj["coordinates"]: + points.extend(linestring) + parts.append(index) + index += len(linestring) + shape.points = points + shape.parts = parts + elif geojType in ("MultiPolygon"): + points = [] + parts = [] + index = 0 + for polygon in geoj["coordinates"]: + for i, ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + return shape + + @property + def oid(self): + """The index position of the shape in the original shapefile""" + return self.__oid + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def __repr__(self): + return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + + +class _Record(list): + """ + A class to hold a record. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, the values of the record + can also be retrieved using the field's name. For example if the dbf contains + a field ID at position 0, the ID can be retrieved with the position, the field name + as a key, or the field name as an attribute. + + >>> # Create a Record with one field, normally the record is created by the Reader class + >>> r = _Record({'ID': 0}, [0]) + >>> print(r[0]) + >>> print(r['ID']) + >>> print(r.ID) + """ + + def __init__(self, field_positions, values, oid=None): + """ + A Record should be created by the Reader class + + :param field_positions: A dict mapping field names to field positions + :param values: A sequence of values + :param oid: The object id, an int (optional) + """ + self.__field_positions = field_positions + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + list.__init__(self, values) + + def __getattr__(self, item): + """ + __getattr__ is called if an attribute is used that does + not exist in the normal sense. For example r=Record(...), r.ID + calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) + :param item: The field name, used as attribute + :return: Value of the field + :raises: AttributeError, if item is not a field of the shapefile + and IndexError, if the field exists but the field's + corresponding value in the Record does not exist + """ + try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") + index = self.__field_positions[item] + return list.__getitem__(self, index) + except KeyError: + raise AttributeError("{} is not a field name".format(item)) + except IndexError: + raise IndexError( + "{} found as a field but not enough values available.".format(item) + ) + + def __setattr__(self, key, value): + """ + Sets a value of a field attribute + :param key: The field name + :param value: the value of that field + :return: None + :raises: AttributeError, if key is not a field of the shapefile + """ + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute + return list.__setattr__(self, key, value) + try: + index = self.__field_positions[key] + return list.__setitem__(self, index, value) + except KeyError: + raise AttributeError("{} is not a field name".format(key)) + + def __getitem__(self, item): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID'], r[0] + :param item: Either the position of the value or the name of a field + :return: the value of the field + """ + try: + return list.__getitem__(self, item) + except TypeError: + try: + index = self.__field_positions[item] + except KeyError: + index = None + if index is not None: + return list.__getitem__(self, index) + else: + raise IndexError('"{}" is not a field name and not an int'.format(item)) + + def __setitem__(self, key, value): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID']=2, r[0]=2 + :param key: Either the position of the value or the name of a field + :param value: the new value of the field + """ + try: + return list.__setitem__(self, key, value) + except TypeError: + index = self.__field_positions.get(key) + if index is not None: + return list.__setitem__(self, index, value) + else: + raise IndexError("{} is not a field name and not an int".format(key)) + + @property + def oid(self): + """The index position of the record in the original shapefile""" + return self.__oid + + def as_dict(self, date_strings=False): + """ + Returns this Record as a dictionary using the field names as keys + :return: dict + """ + dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + if date_strings: + for k, v in dct.items(): + if isinstance(v, date): + dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) + return dct + + def __repr__(self): + return "Record #{}: {}".format(self.__oid, list(self)) + + def __dir__(self): + """ + Helps to show the field names in an interactive environment like IPython. + See: http://ipython.readthedocs.io/en/stable/config/integrating.html + + :return: List of method names and fields + """ + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) + return default + fnames + + def __eq__(self, other): + if isinstance(other, self.__class__): + if self.__field_positions != other.__field_positions: + return False + return list.__eq__(self, other) + + +class ShapeRecord(object): + """A ShapeRecord object containing a shape along with its attributes. + Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + + def __init__(self, shape=None, record=None): + self.shape = shape + self.record = record + + @property + def __geo_interface__(self): + return { + "type": "Feature", + "properties": self.record.as_dict(date_strings=True), + "geometry": None + if self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + + +class Shapes(list): + """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a GeometryCollection dictionary.""" + + def __repr__(self): + return "Shapes: {}".format(list(self)) + + @property + def __geo_interface__(self): + # Note: currently this will fail if any of the shapes are null-geometries + # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords + collection = { + "type": "GeometryCollection", + "geometries": [shape.__geo_interface__ for shape in self], + } + return collection + + +class ShapeRecords(list): + """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a FeatureCollection dictionary.""" + + def __repr__(self): + return "ShapeRecords: {}".format(list(self)) + + @property + def __geo_interface__(self): + collection = { + "type": "FeatureCollection", + "features": [shaperec.__geo_interface__ for shaperec in self], + } + return collection + + +class ShapefileException(Exception): + """An exception to handle shapefile specific problems.""" + + pass + + +class Reader(object): + """Reads the three files of a shapefile as a unit or + separately. If one of the three files (.shp, .shx, + .dbf) is missing no exception is thrown until you try + to call a method that depends on that particular file. + The .shx index file is used if available for efficiency + but is not required to read the geometry from the .shp + file. The "shapefile" argument in the constructor is the + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. + + You can instantiate a Reader without specifying a shapefile + and then specify one later with the load() method. + + Only the shapefile headers are read upon loading. Content + within each file is only accessed when required and as + efficiently as possible. Shapefiles are usually not large + but they can be. + """ + + def __init__(self, *args, **kwargs): + self.shp = None + self.shx = None + self.dbf = None + self._files_to_close = [] + self.shapeName = "Not specified" + self._offsets = [] + self.shpLength = None + self.numRecords = None + self.numShapes = None + self.fields = [] + self.__dbfHdrLength = 0 + self.__fieldLookup = {} + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") + # See if a shapefile name was passed as the first argument + if len(args) > 0: + path = pathlike_obj(args[0]) + if is_string(path): + if ".zip" in path: + # Shapefile is inside a zipfile + if path.count(".zip") > 1: + # Multiple nested zipfiles + raise ShapefileException( + "Reading from multiple nested zipfiles is not supported: %s" + % path + ) + # Split into zipfile and shapefile paths + if path.endswith(".zip"): + zpath = path + shapefile = None + else: + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] + # Create a zip file handle + if zpath.startswith("http"): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode="rb") + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, "r") as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) + elif len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException( + "Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open." + % shapefiles + ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: + try: + member = archive.open(shapefile + "." + ext) + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, ext.lower(), fileobj) + self._files_to_close.append(fileobj) + except: + pass + # Close and delete the temporary zipfile + try: + zipfileobj.close() + except: + pass + # Try to load shapefile + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found in zipfile: %s" % path + ) + + elif path.startswith("http"): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath, _ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ["shp", "shx", "dbf"]: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + "." + ext + _path = urlunparse(_urlinfo) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + self._files_to_close.append(fileobj) + except HTTPError: + pass + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found at url: %s" % path + ) + + else: + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) + if "shp" in kwargs.keys(): + if hasattr(kwargs["shp"], "read"): + self.shp = kwargs["shp"] + # Copy if required + try: + self.shp.seek(0) + except (NameError, io.UnsupportedOperation): + self.shp = io.BytesIO(self.shp.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shp"]) + self.load_shp(baseName) + + if "shx" in kwargs.keys(): + if hasattr(kwargs["shx"], "read"): + self.shx = kwargs["shx"] + # Copy if required + try: + self.shx.seek(0) + except (NameError, io.UnsupportedOperation): + self.shx = io.BytesIO(self.shx.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shx"]) + self.load_shx(baseName) + + if "dbf" in kwargs.keys(): + if hasattr(kwargs["dbf"], "read"): + self.dbf = kwargs["dbf"] + # Copy if required + try: + self.dbf.seek(0) + except (NameError, io.UnsupportedOperation): + self.dbf = io.BytesIO(self.dbf.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["dbf"]) + self.load_dbf(baseName) + + # Load the files + if self.shp or self.dbf: + self.load() + + def __str__(self): + """ + Use some general info on the shapefile as __str__ + """ + info = ["shapefile Reader"] + if self.shp: + info.append( + " {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType] + ) + ) + if self.dbf: + info.append( + " {} records ({} fields)".format(len(self), len(self.fields)) + ) + return "\n".join(info) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, close opened files. + """ + self.close() + + def __len__(self): + """Returns the number of shapes/records in the shapefile.""" + if self.dbf: + # Preferably use dbf record count + if self.numRecords is None: + self.__dbfHeader() + + return self.numRecords + + elif self.shp: + # Otherwise use shape count + if self.shx: + if self.numShapes is None: + self.__shxHeader() + + return self.numShapes + + else: + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + unpack = Struct(">2i").unpack + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + else: + # No file loaded yet, treat as 'empty' shapefile + return 0 + + def __iter__(self): + """Iterates through the shapes/records in the shapefile.""" + for shaperec in self.iterShapeRecords(): + yield shaperec + + @property + def __geo_interface__(self): + shaperecords = self.shapeRecords() + fcollection = shaperecords.__geo_interface__ + fcollection["bbox"] = list(self.bbox) + return fcollection + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def load(self, shapefile=None): + """Opens a shapefile from a filename or file-like + object. Normally this method would be called by the + constructor with the file name as an argument.""" + if shapefile: + (shapeName, ext) = os.path.splitext(shapefile) + self.shapeName = shapeName + self.load_shp(shapeName) + self.load_shx(shapeName) + self.load_dbf(shapeName) + if not (self.shp or self.dbf): + raise ShapefileException( + "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + ) + if self.shp: + self.__shpHeader() + if self.dbf: + self.__dbfHeader() + if self.shx: + self.__shxHeader() + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + shp_ext = "shp" + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self._files_to_close.append(self.shp) + except IOError: + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self._files_to_close.append(self.shp) + except IOError: + pass + + def load_shx(self, shapefile_name): + """ + Attempts to load file with .shx extension as both lower and upper case + """ + shx_ext = "shx" + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self._files_to_close.append(self.shx) + except IOError: + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self._files_to_close.append(self.shx) + except IOError: + pass + + def load_dbf(self, shapefile_name): + """ + Attempts to load file with .dbf extension as both lower and upper case + """ + dbf_ext = "dbf" + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self._files_to_close.append(self.dbf) + except IOError: + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self._files_to_close.append(self.dbf) + except IOError: + pass + + def __del__(self): + self.close() + + def close(self): + # Close any files that the reader opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Checks to see if the requested shapefile file object is + available. If not a ShapefileException is raised.""" + if not f: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) + if self.shp and self.shpLength is None: + self.load() + if self.dbf and len(self.fields) == 0: + self.load() + return f + + def __restrictIndex(self, i): + """Provides list-like handling of a record index with a clearer + error message if the index is out of bounds.""" + if self.numRecords: + rmax = self.numRecords - 1 + if abs(i) > rmax: + raise IndexError( + "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + ) + if i < 0: + i = range(self.numRecords)[i] + return i + + def __shpHeader(self): + """Reads the header information from a .shp file.""" + if not self.shp: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) + shp = self.shp + # File length (16-bit word * 2 = bytes) + shp.seek(24) + self.shpLength = unpack(">i", shp.read(4))[0] * 2 + # Shape type + shp.seek(32) + self.shapeType = unpack(" NODATA: + self.mbox.append(m) + else: + self.mbox.append(None) + + def __shape(self, oid=None, bbox=None): + """Returns the header info and geometry for a single shape.""" + f = self.__getFileObj(self.shp) + record = Shape(oid=oid) + nParts = nPoints = zmin = zmax = mmin = mmax = None + (recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + (mmin, mmax) = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point + if shapeType in (1, 11, 21): + record.points = [_Array("d", unpack("<2d", f.read(16)))] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None + # Read a single Z value + if shapeType == 11: + record.z = list(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = [m] + else: + record.m = [None] + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + f.seek(next) + return record + + def __shxHeader(self): + """Reads the header information from a .shx file.""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # File length (16-bit word * 2 = bytes) - header length + shx.seek(24) + shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 + self.numShapes = shxRecordLength // 8 + + def __shxOffsets(self): + """Reads the shape offset positions from a .shx file""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # Jump to the first record. + shx.seek(100) + # Each index record consists of two nrs, we only want the first one + shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": + shxRecords.byteswap() + self._offsets = [2 * el for el in shxRecords[::2]] + + def __shapeIndex(self, i=None): + """Returns the offset in a .shp file for a shape based on information + in the .shx index file.""" + shx = self.shx + # Return None if no shx or no index requested + if not shx or i is None: + return None + # At this point, we know the shx file exists + if not self._offsets: + self.__shxOffsets() + return self._offsets[i] + + def shape(self, i=0, bbox=None): + """Returns a shape object for a shape in the geometry + record file. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + shp = self.__getFileObj(self.shp) + i = self.__restrictIndex(i) + offset = self.__shapeIndex(i) + if not offset: + # Shx index not available. + # Determine length of shp file + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until the requested index or end of file. + unpack = Struct(">2i").unpack + _i = 0 + offset = shp.tell() + while offset < shpLength: + if _i == i: + # Reached the requested index, exit loop with the offset value + break + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + offset += 8 + (2 * recLength) + shp.seek(offset) + _i += 1 + # If the index was not found, it likely means the .shp file is incomplete + if _i != i: + raise ShapefileException( + "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( + i, _i + ) + ) + + # Seek to the offset and read the shape + shp.seek(offset) + return self.__shape(oid=i, bbox=bbox) + + def shapes(self, bbox=None): + """Returns all shapes in a shapefile. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shapes = Shapes() + shapes.extend(self.iterShapes(bbox=bbox)) + return shapes + + def iterShapes(self, bbox=None): + """Returns a generator of shapes in a shapefile. Useful + for handling large shapefiles. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shp = self.__getFileObj(self.shp) + # Found shapefiles which report incorrect + # shp file length in the header. Can't trust + # that so we seek to the end of the file + # and figure it out. + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + + if self.numShapes: + # Iterate exactly the number of shapes from shx header + for i in xrange(self.numShapes): + # MAYBE: check if more left of file or exit early? + shape = self.__shape(oid=i, bbox=bbox) + if shape: + yield shape + else: + # No shx file, unknown nr of shapes + # Instead iterate until reach end of file + # Collect the offset indices during iteration + i = 0 + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + shape = self.__shape(oid=i, bbox=bbox) + pos = shp.tell() + if shape: + yield shape + i += 1 + # Entire shp file consumed + # Update the number of shapes and list of offsets + assert i == len(offsets) + self.numShapes = i + self._offsets = offsets + + def __dbfHeader(self): + """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + if not self.dbf: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) + dbf = self.dbf + # read relevant header parts + dbf.seek(0) + self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( + " self.numRecords: + raise IndexError( + "abs(stop): %s exceeds number of records: %s." + % (abs(stop), self.numRecords) + ) + elif stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(self.__dbfHdrLength + (start * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + for i in xrange(start, stop): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + yield r + + def shapeRecord(self, i=0, fields=None, bbox=None): + """Returns a combination geometry and attribute record for the + supplied record index. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + i = self.__restrictIndex(i) + shape = self.shape(i, bbox=bbox) + if shape: + record = self.record(i, fields=fields) + return ShapeRecord(shape=shape, record=record) + + def shapeRecords(self, fields=None, bbox=None): + """Returns a list of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) + + def iterShapeRecords(self, fields=None, bbox=None): + """Returns a generator of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + if bbox is None: + # iterate through all shapes and records + for shape, record in izip( + self.iterShapes(), self.iterRecords(fields=fields) + ): + yield ShapeRecord(shape=shape, record=record) + else: + # only iterate where shape.bbox overlaps with the given bbox + # TODO: internal __record method should be faster but would have to + # make sure to seek to correct file location... + + # fieldTuples,recLookup,recStruct = self.__recordFields(fields) + for shape in self.iterShapes(bbox=bbox): + if shape: + # record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) + record = self.record(i=shape.oid, fields=fields) + yield ShapeRecord(shape=shape, record=record) + + +class Writer(object): + """Provides write support for ESRI Shapefiles.""" + + def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): + self.target = target + self.autoBalance = autoBalance + self.fields = [] + self.shapeType = shapeType + self.shp = self.shx = self.dbf = None + self._files_to_close = [] + if target: + target = pathlike_obj(target) + if not is_string(target): + raise Exception( + "The target filepath {} must be of type str/unicode or path-like, not {}.".format( + repr(target), type(target) + ) + ) + self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") + self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") + self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") + elif kwargs.get("shp") or kwargs.get("shx") or kwargs.get("dbf"): + shp, shx, dbf = kwargs.get("shp"), kwargs.get("shx"), kwargs.get("dbf") + if shp: + self.shp = self.__getFileObj(shp) + if shx: + self.shx = self.__getFileObj(shx) + if dbf: + self.dbf = self.__getFileObj(dbf) + else: + raise Exception( + "Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile." + ) + # Initiate with empty headers, to be finalized upon closing + if self.shp: + self.shp.write(b"9" * 100) + if self.shx: + self.shx.write(b"9" * 100) + # Geometry record offsets and lengths for writing shx file. + self.recNum = 0 + self.shpNum = 0 + self._bbox = None + self._zbox = None + self._mbox = None + # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. + self.deletionFlag = 0 + # Encoding + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") + + def __len__(self): + """Returns the current number of features written to the shapefile. + If shapes and records are unbalanced, the length is considered the highest + of the two.""" + return max(self.recNum, self.shpNum) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, finish writing and close the files. + """ + self.close() + + def __del__(self): + self.close() + + def close(self): + """ + Write final shp, shx, and dbf headers, close opened files. + """ + # Check if any of the files have already been closed + shp_open = self.shp and not (hasattr(self.shp, "closed") and self.shp.closed) + shx_open = self.shx and not (hasattr(self.shx, "closed") and self.shx.closed) + dbf_open = self.dbf and not (hasattr(self.dbf, "closed") and self.dbf.closed) + + # Balance if already not balanced + if self.shp and shp_open and self.dbf and dbf_open: + if self.autoBalance: + self.balance() + if self.recNum != self.shpNum: + raise ShapefileException( + "When saving both the dbf and shp file, " + "the number of records (%s) must correspond " + "with the number of shapes (%s)" % (self.recNum, self.shpNum) + ) + # Fill in the blank headers + if self.shp and shp_open: + self.__shapefileHeader(self.shp, headerType="shp") + if self.shx and shx_open: + self.__shapefileHeader(self.shx, headerType="shx") + + # Update the dbf header with final length etc + if self.dbf and dbf_open: + self.__dbfHeader() + + # Flush files + for attribute in (self.shp, self.shx, self.dbf): + if hasattr(attribute, "flush") and not ( + hasattr(attribute, "closed") and attribute.closed + ): + try: + attribute.flush() + except IOError: + pass + + # Close any files that the writer opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Safety handler to verify file-like objects""" + if not f: + raise ShapefileException("No file-like object available.") + elif hasattr(f, "write"): + return f + else: + pth = os.path.split(f)[0] + if pth and not os.path.exists(pth): + os.makedirs(pth) + fp = open(f, "wb+") + self._files_to_close.append(fp) + return fp + + def __shpFileLength(self): + """Calculates the file length of the shp file.""" + # Remember starting position + start = self.shp.tell() + # Calculate size of all shapes + self.shp.seek(0, 2) + size = self.shp.tell() + # Calculate size as 16-bit words + size //= 2 + # Return to start + self.shp.seek(start) + return size + + def __bbox(self, s): + x = [] + y = [] + if len(s.points) > 0: + px, py = list(zip(*s.points))[:2] + x.extend(px) + y.extend(py) + else: + # this should not happen. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. + raise Exception( + "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." + % s.shapeType + ) + bbox = [min(x), min(y), max(x), max(y)] + # update global + if self._bbox: + # compare with existing + self._bbox = [ + min(bbox[0], self._bbox[0]), + min(bbox[1], self._bbox[1]), + max(bbox[2], self._bbox[2]), + max(bbox[3], self._bbox[3]), + ] + else: + # first time bbox is being set + self._bbox = bbox + return bbox + + def __zbox(self, s): + z = [] + for p in s.points: + try: + z.append(p[2]) + except IndexError: + # point did not have z value + # setting it to 0 is probably ok, since it means all are on the same elevation + z.append(0) + zbox = [min(z), max(z)] + # update global + if self._zbox: + # compare with existing + self._zbox = [min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])] + else: + # first time zbox is being set + self._zbox = zbox + return zbox + + def __mbox(self, s): + mpos = 3 if s.shapeType in (11, 13, 15, 18, 31) else 2 + m = [] + for p in s.points: + try: + if p[mpos] is not None: + # mbox should only be calculated on valid m values + m.append(p[mpos]) + except IndexError: + # point did not have m value so is missing + # mbox should only be calculated on valid m values + pass + if not m: + # only if none of the shapes had m values, should mbox be set to missing m values + m.append(NODATA) + mbox = [min(m), max(m)] + # update global + if self._mbox: + # compare with existing + self._mbox = [min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])] + else: + # first time mbox is being set + self._mbox = mbox + return mbox + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def bbox(self): + """Returns the current bounding box for the shapefile which is + the lower-left and upper-right corners. It does not contain the + elevation or measure extremes.""" + return self._bbox + + def zbox(self): + """Returns the current z extremes for the shapefile.""" + return self._zbox + + def mbox(self): + """Returns the current m extremes for the shapefile.""" + return self._mbox + + def __shapefileHeader(self, fileObj, headerType="shp"): + """Writes the specified header type to the specified file-like object. + Several of the shapefile formats are so similar that a single generic + method to read or write them is warranted.""" + f = self.__getFileObj(fileObj) + f.seek(0) + # File code, Unused bytes + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) + # File length (Bytes / 2 = 16-bit words) + if headerType == "shp": + f.write(pack(">i", self.__shpFileLength())) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) + # Version, Shape type + if self.shapeType is None: + self.shapeType = NULL + f.write(pack("<2i", 1000, self.shapeType)) + # The shapefile's bounding box (lower left, upper right) + if self.shapeType != 0: + try: + bbox = self.bbox() + if bbox is None: + # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. + # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. + # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. + # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. + bbox = [0, 0, 0, 0] + f.write(pack("<4d", *bbox)) + except error: + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) + else: + f.write(pack("<4d", 0, 0, 0, 0)) + # Elevation + if self.shapeType in (11, 13, 15, 18): + # Z values are present in Z type + zbox = self.zbox() + if zbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + zbox = [0, 0] + else: + # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s + zbox = [0, 0] + # Measure + if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): + # M values are present in M or Z type + mbox = self.mbox() + if mbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + mbox = [0, 0] + else: + # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s + mbox = [0, 0] + # Try writing + try: + f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) + except error: + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) + + def __dbfHeader(self): + """Writes the dbf header and field descriptors.""" + f = self.__getFileObj(self.dbf) + f.seek(0) + version = 3 + year, month, day = time.localtime()[:3] + year -= 1900 + # Get all fields, ignoring DeletionFlag if specified + fields = [field for field in self.fields if field[0] != "DeletionFlag"] + # Ensure has at least one field + if not fields: + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) + numRecs = self.recNum + numFields = len(fields) + headerLength = numFields * 32 + 33 + if headerLength >= 65535: + raise ShapefileException( + "Shapefile dbf header length exceeds maximum length." + ) + recordLength = sum([int(field[2]) for field in fields]) + 1 + header = pack( + "2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if s.shapeType != NULL and s.shapeType != self.shapeType: + raise Exception( + "The shape's type (%s) must match the type of the shapefile (%s)." + % (s.shapeType, self.shapeType) + ) + f.write(pack(" 2 else 0)) for p in s.points] + except error: + raise ShapefileException( + "Failed to write elevation values for record %s. Expected floats." + % self.shpNum + ) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): + try: + f.write(pack("<2d", *self.__mbox(s))) + except error: + raise ShapefileException( + "Failed to write measure extremes for record %s. Expected floats" + % self.shpNum + ) + try: + if hasattr(s, "m"): + # if m values are stored in attribute + # fmt: off + f.write( + pack( + "<%sd" % len(s.m), + *[m if m is not None else NODATA for m in s.m] + ) + ) + # fmt: on + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + [ + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + for p in s.points + ] + except error: + raise ShapefileException( + "Failed to write measure values for record %s. Expected floats" + % self.shpNum + ) + # Write a single point + if s.shapeType in (1, 11, 21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException( + "Failed to write point for record %s. Expected floats." + % self.shpNum + ) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + self.__zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack("i", length)) + f.seek(finish) + return offset, length + + def __shxRecord(self, offset, length): + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) + + def record(self, *recordList, **recordDict): + """Creates a dbf attribute record. You can submit either a sequence of + field values or keyword arguments of field names and values. Before + adding records you must add fields for the record values using the + field() method. If the record values exceed the number of fields the + extra ones won't be added. In the case of using keyword arguments to specify + field/value pairs only fields matching the already registered fields + will be added.""" + # Balance if already not balanced + if self.autoBalance and self.recNum > self.shpNum: + self.balance() + + fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) + if recordList: + record = list(recordList) + while len(record) < fieldCount: + record.append("") + elif recordDict: + record = [] + for field in self.fields: + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified + if field[0] in recordDict: + val = recordDict[field[0]] + if val is None: + record.append("") + else: + record.append(val) + else: + record.append("") # need empty value for missing dict entries + else: + # Blank fields for empty record + record = ["" for _ in range(fieldCount)] + self.__dbfRecord(record) + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, ".%sf" % deci)[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = "{:04d}{:02d}{:02d}".format( + value.year, value.month, value.day + ) + elif isinstance(value, list) and len(value) == 3: + value = "{:04d}{:02d}{:02d}".format(*value) + elif value in MISSING: + value = b"0" * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" + else: + value = b" " # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + value = b( + value, "ascii", self.encodingErrors + ) # should be default ascii encoding + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + " (size %d) into field '%s' (size %d)." + % (len(value), fieldName, size) + ) + f.write(value) + + def balance(self): + """Adds corresponding empty attributes or null geometry records depending + on which type of record was created to make sure all three files + are in synch.""" + while self.recNum > self.shpNum: + self.null() + while self.recNum < self.shpNum: + self.record() + + def null(self): + """Creates a null shape.""" + self.shape(Shape(NULL)) + + def point(self, x, y): + """Creates a POINT shape.""" + shapeType = POINT + pointShape = Shape(shapeType) + pointShape.points.append([x, y]) + self.shape(pointShape) + + def pointm(self, x, y, m=None): + """Creates a POINTM shape. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTM + pointShape = Shape(shapeType) + pointShape.points.append([x, y, m]) + self.shape(pointShape) + + def pointz(self, x, y, z=0, m=None): + """Creates a POINTZ shape. + If the z (elevation) value is not set, it defaults to 0. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTZ + pointShape = Shape(shapeType) + pointShape.points.append([x, y, z, m]) + self.shape(pointShape) + + def multipoint(self, points): + """Creates a MULTIPOINT shape. + Points is a list of xy values.""" + shapeType = MULTIPOINT + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointm(self, points): + """Creates a MULTIPOINTM shape. + Points is a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTM + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointz(self, points): + """Creates a MULTIPOINTZ shape. + Points is a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTZ + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def line(self, lines): + """Creates a POLYLINE shape. + Lines is a collection of lines, each made up of a list of xy values.""" + shapeType = POLYLINE + self._shapeparts(parts=lines, shapeType=shapeType) + + def linem(self, lines): + """Creates a POLYLINEM shape. + Lines is a collection of lines, each made up of a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEM + self._shapeparts(parts=lines, shapeType=shapeType) + + def linez(self, lines): + """Creates a POLYLINEZ shape. + Lines is a collection of lines, each made up of a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEZ + self._shapeparts(parts=lines, shapeType=shapeType) + + def poly(self, polys): + """Creates a POLYGON shape. + Polys is a collection of polygons, each made up of a list of xy values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction.""" + shapeType = POLYGON + self._shapeparts(parts=polys, shapeType=shapeType) + + def polym(self, polys): + """Creates a POLYGONM shape. + Polys is a collection of polygons, each made up of a list of xym values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONM + self._shapeparts(parts=polys, shapeType=shapeType) + + def polyz(self, polys): + """Creates a POLYGONZ shape. + Polys is a collection of polygons, each made up of a list of xyzm values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONZ + self._shapeparts(parts=polys, shapeType=shapeType) + + def multipatch(self, parts, partTypes): + """Creates a MULTIPATCH shape. + Parts is a collection of 3D surface patches, each made up of a list of xyzm values. + PartTypes is a list of types that define each of the surface patches. + The types can be any of the following module constants: TRIANGLE_STRIP, + TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPATCH + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + polyShape.partTypes = partTypes + # write the shape + self.shape(polyShape) + + def _shapeparts(self, parts, shapeType): + """Internal method for adding a shape that has multiple collections of points (parts): + lines, polygons, and multipoint shapes. + """ + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + # Make sure polygon rings (parts) are closed + if shapeType in (5, 15, 25, 31): + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + # Add points and part indexes + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + # write the shape + self.shape(polyShape) + + def field(self, name, fieldType="C", size="50", decimal=0): + """Adds a dbf field descriptor to the shapefile.""" + if fieldType == "D": + size = "8" + decimal = 0 + elif fieldType == "L": + size = "1" + decimal = 0 + if len(self.fields) >= 2046: + raise ShapefileException( + "Shapefile Writer reached maximum number of fields: 2046." + ) + self.fields.append((name, fieldType, size, decimal)) + + +# Begin Testing +def test(**kwargs): + import doctest + + doctest.NORMALIZE_WHITESPACE = 1 + verbosity = kwargs.get("verbose", 0) + if verbosity == 0: + print("Running doctests...") + + # ignore py2-3 unicode differences + import re + + class Py23DocChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + if sys.version_info[0] == 2: + got = re.sub("u'(.*?)'", "'\\1'", got) + got = re.sub('u"(.*?)"', '"\\1"', got) + res = doctest.OutputChecker.check_output(self, want, got, optionflags) + return res + + def summarize(self): + doctest.OutputChecker.summarize(True) + + # run tests + runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) + with open("README.md", "rb") as fobj: + test = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) + failure_count, test_count = runner.run(test) + + # print results + if verbosity: + runner.summarize(True) + else: + if failure_count == 0: + print("All test passed successfully") + elif failure_count > 0: + runner.summarize(verbosity) + + return failure_count + + +if __name__ == "__main__": + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = test() + sys.exit(failure_count) diff --git a/shapefile.pyc b/shapefile.pyc deleted file mode 100644 index ae245f3..0000000 Binary files a/shapefile.pyc and /dev/null differ diff --git a/shapefiles/blockgroups.zip b/shapefiles/blockgroups.zip new file mode 100644 index 0000000..7293c09 Binary files /dev/null and b/shapefiles/blockgroups.zip differ diff --git a/shapefiles/blockgroups_multishapefile.zip b/shapefiles/blockgroups_multishapefile.zip new file mode 100644 index 0000000..085b2e4 Binary files /dev/null and b/shapefiles/blockgroups_multishapefile.zip differ diff --git a/shapefiles/empty_zipfile.zip b/shapefiles/empty_zipfile.zip new file mode 100644 index 0000000..f3f170d Binary files /dev/null and b/shapefiles/empty_zipfile.zip differ diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf index 2ba4224..c77d63b 100644 Binary files a/shapefiles/test/balancing.dbf and b/shapefiles/test/balancing.dbf differ diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf index cc5071c..e030c2a 100644 Binary files a/shapefiles/test/contextwriter.dbf and b/shapefiles/test/contextwriter.dbf differ diff --git a/shapefiles/test/corrupt_too_long.dbf b/shapefiles/test/corrupt_too_long.dbf new file mode 100644 index 0000000..57230c5 Binary files /dev/null and b/shapefiles/test/corrupt_too_long.dbf differ diff --git a/shapefiles/test/corrupt_too_long.shp b/shapefiles/test/corrupt_too_long.shp new file mode 100644 index 0000000..e9ef9cf Binary files /dev/null and b/shapefiles/test/corrupt_too_long.shp differ diff --git a/shapefiles/test/corrupt_too_long.shx b/shapefiles/test/corrupt_too_long.shx new file mode 100644 index 0000000..79f2870 Binary files /dev/null and b/shapefiles/test/corrupt_too_long.shx differ diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf index 7279004..1ddda01 100644 Binary files a/shapefiles/test/dtype.dbf and b/shapefiles/test/dtype.dbf differ diff --git a/shapefiles/test/edit.dbf b/shapefiles/test/edit.dbf new file mode 100644 index 0000000..8854e03 Binary files /dev/null and b/shapefiles/test/edit.dbf differ diff --git a/shapefiles/test/edit.shp b/shapefiles/test/edit.shp new file mode 100644 index 0000000..f1734a3 Binary files /dev/null and b/shapefiles/test/edit.shp differ diff --git a/shapefiles/test/edit.shx b/shapefiles/test/edit.shx new file mode 100644 index 0000000..a2f62ac Binary files /dev/null and b/shapefiles/test/edit.shx differ diff --git a/shapefiles/test/line.dbf b/shapefiles/test/line.dbf index d808be3..24f529e 100644 Binary files a/shapefiles/test/line.dbf and b/shapefiles/test/line.dbf differ diff --git a/shapefiles/test/linem.dbf b/shapefiles/test/linem.dbf index 215f594..28c6681 100644 Binary files a/shapefiles/test/linem.dbf and b/shapefiles/test/linem.dbf differ diff --git a/shapefiles/test/linez.dbf b/shapefiles/test/linez.dbf index bbd5538..6709eb0 100644 Binary files a/shapefiles/test/linez.dbf and b/shapefiles/test/linez.dbf differ diff --git a/shapefiles/test/merge.dbf b/shapefiles/test/merge.dbf new file mode 100644 index 0000000..4164f11 Binary files /dev/null and b/shapefiles/test/merge.dbf differ diff --git a/shapefiles/test/merge.shp b/shapefiles/test/merge.shp new file mode 100644 index 0000000..b380b99 Binary files /dev/null and b/shapefiles/test/merge.shp differ diff --git a/shapefiles/test/merge.shx b/shapefiles/test/merge.shx new file mode 100644 index 0000000..53ba59e Binary files /dev/null and b/shapefiles/test/merge.shx differ diff --git a/shapefiles/test/multipatch.dbf b/shapefiles/test/multipatch.dbf index af40493..bc9fe23 100644 Binary files a/shapefiles/test/multipatch.dbf and b/shapefiles/test/multipatch.dbf differ diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf index 0b2570e..4d7d4f1 100644 Binary files a/shapefiles/test/multipoint.dbf and b/shapefiles/test/multipoint.dbf differ diff --git a/shapefiles/test/onlydbf.dbf b/shapefiles/test/onlydbf.dbf index cc5071c..e030c2a 100644 Binary files a/shapefiles/test/onlydbf.dbf and b/shapefiles/test/onlydbf.dbf differ diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf index 994eddc..e29d085 100644 Binary files a/shapefiles/test/point.dbf and b/shapefiles/test/point.dbf differ diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf index 9729fad..b116dc4 100644 Binary files a/shapefiles/test/polygon.dbf and b/shapefiles/test/polygon.dbf differ diff --git a/shapefiles/test/polygon.shp b/shapefiles/test/polygon.shp index afd7c36..c7654eb 100644 Binary files a/shapefiles/test/polygon.shp and b/shapefiles/test/polygon.shp differ diff --git a/shapefiles/test/polygon.shx b/shapefiles/test/polygon.shx index 37d1ddb..df6e015 100644 Binary files a/shapefiles/test/polygon.shx and b/shapefiles/test/polygon.shx differ diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf index cc5071c..e030c2a 100644 Binary files a/shapefiles/test/shapetype.dbf and b/shapefiles/test/shapetype.dbf differ diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf index cc5071c..e030c2a 100644 Binary files a/shapefiles/test/testfile.dbf and b/shapefiles/test/testfile.dbf differ diff --git a/test_shapefile.py b/test_shapefile.py new file mode 100644 index 0000000..7984e91 --- /dev/null +++ b/test_shapefile.py @@ -0,0 +1,1842 @@ +""" +This module tests the functionality of shapefile.py. +""" + +import datetime +import json +import os.path + +try: + from pathlib import Path +except ImportError: + # pathlib2 is a dependency of pytest >= 3.7 + from pathlib2 import Path + +# third party imports +import pytest + +# our imports +import shapefile + +# define various test shape tuples of (type, points, parts indexes, and expected geo interface output) +geo_interface_tests = [ + ( + shapefile.POINT, # point + [(1, 1)], + [], + {"type": "Point", "coordinates": (1, 1)}, + ), + ( + shapefile.MULTIPOINT, # multipoint + [(1, 1), (2, 1), (2, 2)], + [], + {"type": "MultiPoint", "coordinates": [(1, 1), (2, 1), (2, 2)]}, + ), + ( + shapefile.POLYLINE, # single linestring + [(1, 1), (2, 1)], + [0], + {"type": "LineString", "coordinates": [(1, 1), (2, 1)]}, + ), + ( + shapefile.POLYLINE, # multi linestring + [ + (1, 1), + (2, 1), # line 1 + (10, 10), + (20, 10), + ], # line 2 + [0, 2], + { + "type": "MultiLineString", + "coordinates": [ + [(1, 1), (2, 1)], # line 1 + [(10, 10), (20, 10)], # line 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + ], + [0], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (ordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (unordered) + [ + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 + (2, 2), + (8, 2), + (8, 8), + (2, 8), + (2, 2), # hole 1.1 + ], + [0, 5, 10, 15, 20], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [(2, 2), (8, 2), (8, 8), (2, 8), (2, 2)], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [(4, 4), (6, 4), (6, 6), (4, 6), (4, 4)], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 (hole has duplicate coords) + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + ( + 2, + 2, + ), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0, 5, 10, 15, 20 + 3], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [ + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + (2, 2), + ], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [ + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), + ], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (95, 95), + (97, 95), + (97, 97), + (95, 97), + (95, 95), # hole x.1 (orphaned hole, should be interpreted as exterior) + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25, 30], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + [ # poly 3 (orphaned hole) + [(95, 95), (97, 95), (97, 97), (95, 97), (95, 95)], # exterior + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning + [ + (1, 1), + (9, 1), + (9, 9), + (1, 9), + (1, 1), # exterior with hole-orientation + (11, 11), + (19, 11), + (19, 19), + (11, 19), + (11, 11), # exterior with hole-orientation + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (9, 1), (9, 9), (1, 9), (1, 1)], + ], + [ # poly 2 + [(11, 11), (19, 11), (19, 19), (11, 19), (11, 11)], + ], + ], + }, + ), +] + + +def test_empty_shape_geo_interface(): + """ + Assert that calling __geo_interface__ + on a Shape with no points or parts + raises an Exception. + """ + shape = shapefile.Shape() + with pytest.raises(Exception): + getattr(shape, "__geo_interface__") + + +@pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) +def test_expected_shape_geo_interface(typ, points, parts, expected): + """ + Assert that calling __geo_interface__ + on arbitrary input Shape works as expected. + """ + shape = shapefile.Shape(typ, points, parts) + geoj = shape.__geo_interface__ + assert geoj == expected + + +def test_reader_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.__geo_interface__ + assert geoj["type"] == "FeatureCollection" + assert "bbox" in geoj + assert json.dumps(geoj) + + +def test_shapes_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.shapes().__geo_interface__ + assert geoj["type"] == "GeometryCollection" + assert json.dumps(geoj) + + +def test_shaperecords_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + geoj = r.shapeRecords().__geo_interface__ + assert geoj["type"] == "FeatureCollection" + assert json.dumps(geoj) + + +def test_shaperecord_geo_interface(): + with shapefile.Reader("shapefiles/blockgroups") as r: + for shaperec in r: + assert json.dumps(shaperec.__geo_interface__) + + +@pytest.mark.network +def test_reader_url(): + """ + Assert that Reader can open shapefiles from a url. + """ + # test with extension + url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" + with shapefile.Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + # test without extension + url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" + with shapefile.Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + # test no files found + url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader(url) as sf: + pass + + # test reading zipfile from url + url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" + with shapefile.Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + +def test_reader_zip(): + """ + Assert that Reader can open shapefiles inside a zipfile. + """ + # test reading zipfile only + with shapefile.Reader("shapefiles/blockgroups.zip") as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + # test require specific path when reading multi-shapefile zipfile + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip") as sf: + pass + + # test specifying the path when reading multi-shapefile zipfile (with extension) + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp" + ) as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + # test specifying the path when reading multi-shapefile zipfile (without extension) + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2" + ) as sf: + for __recShape in sf.iterShapeRecords(): + pass + assert len(sf) > 0 + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + + # test raising error when can't find shapefile inside zipfile + with pytest.raises(shapefile.ShapefileException): + with shapefile.Reader("shapefiles/empty_zipfile.zip") as sf: + pass + + +def test_reader_close_path(): + """ + Assert that manually calling Reader.close() + closes the shp, shx, and dbf files + on exit, if given paths. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + sf = shapefile.Reader("shapefiles/blockgroups.shp") + sf.close() + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + # check that can read again + sf = shapefile.Reader("shapefiles/blockgroups.shp") + sf.close() + + +def test_reader_close_filelike(): + """ + Assert that manually calling Reader.close() + leaves the shp, shx, and dbf files open + on exit, if given filelike objects. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") + sf = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) + sf.close() + + assert sf.shp.closed is False + assert sf.dbf.closed is False + assert sf.shx.closed is False + + # check that can read again + sf = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) + sf.close() + + +def test_reader_context_path(): + """ + Assert that using the context manager + closes the shp, shx, and dbf files + on exit, if given paths. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + with shapefile.Reader("shapefiles/blockgroups") as sf: + pass + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + # check that can read again + with shapefile.Reader("shapefiles/blockgroups") as sf: + pass + + +def test_reader_context_filelike(): + """ + Assert that using the context manager + leaves the shp, shx, and dbf files open + on exit, if given filelike objects. + """ + # note uses an actual shapefile from + # the projects "shapefiles" directory + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") + with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as sf: + pass + + assert sf.shp.closed is False + assert sf.dbf.closed is False + assert sf.shx.closed is False + + # check that can read again + with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as sf: + pass + + +def test_reader_shapefile_type(): + """ + Assert that the type of the shapefile + is returned correctly. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert sf.shapeType == 5 # 5 means Polygon + assert sf.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" + + +def test_reader_shapefile_length(): + """ + Assert that the length the reader gives us + matches up with the number of records + in the file. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert len(sf) == len(sf.shapes()) + + +def test_shape_metadata(): + with shapefile.Reader("shapefiles/blockgroups") as sf: + shape = sf.shape(0) + assert shape.shapeType == 5 # Polygon + assert shape.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" + + +def test_reader_fields(): + """ + Assert that the reader's fields attribute + gives the shapefile's fields as a list. + Assert that each field has a name, + type, field length, and decimal length. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + fields = sf.fields + assert isinstance(fields, list) + + field = fields[0] + assert isinstance(field[0], str) # field name + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert isinstance(field[2], int) # field length + assert isinstance(field[3], int) # decimal length + + +def test_reader_shapefile_extension_ignored(): + """ + Assert that the filename's extension is + ignored when reading a shapefile. + """ + base = "shapefiles/blockgroups" + ext = ".abc" + filename = base + ext + with shapefile.Reader(filename) as sf: + assert len(sf) == 663 + + # assert test.abc does not exist + assert not os.path.exists(filename) + + +def test_reader_pathlike(): + """ + Assert that path-like objects can be read. + """ + base = Path("shapefiles") + with shapefile.Reader(base / "blockgroups") as sf: + assert len(sf) == 663 + + +def test_reader_dbf_only(): + """ + Assert that specifying just the + dbf argument to the shapefile reader + reads just the dbf file. + """ + with shapefile.Reader(dbf="shapefiles/blockgroups.dbf") as sf: + assert len(sf) == 663 + record = sf.record(3) + assert record[1:3] == ["060750601001", 4715] + + +def test_reader_shp_shx_only(): + """ + Assert that specifying just the + shp and shx argument to the shapefile reader + reads just the shp and shx file. + """ + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx" + ) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + + +def test_reader_shp_dbf_only(): + """ + Assert that specifying just the + shp and shx argument to the shapefile reader + reads just the shp and dbf file. + """ + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf" + ) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + record = sf.record(3) + assert record[1:3] == ["060750601001", 4715] + + +def test_reader_shp_only(): + """ + Assert that specifying just the + shp argument to the shapefile reader + reads just the shp file (shx optional). + """ + with shapefile.Reader(shp="shapefiles/blockgroups.shp") as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + + +def test_reader_filelike_dbf_only(): + """ + Assert that specifying just the + dbf argument to the shapefile reader + reads just the dbf file. + """ + with shapefile.Reader(dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: + assert len(sf) == 663 + record = sf.record(3) + assert record[1:3] == ["060750601001", 4715] + + +def test_reader_filelike_shp_shx_only(): + """ + Assert that specifying just the + shp and shx argument to the shapefile reader + reads just the shp and shx file. + """ + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + shx=open("shapefiles/blockgroups.shx", "rb"), + ) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + + +def test_reader_filelike_shp_dbf_only(): + """ + Assert that specifying just the + shp and shx argument to the shapefile reader + reads just the shp and dbf file. + """ + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + dbf=open("shapefiles/blockgroups.dbf", "rb"), + ) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + record = sf.record(3) + assert record[1:3] == ["060750601001", 4715] + + +def test_reader_filelike_shp_only(): + """ + Assert that specifying just the + shp argument to the shapefile reader + reads just the shp file (shx optional). + """ + with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb")) as sf: + assert len(sf) == 663 + shape = sf.shape(3) + assert len(shape.points) == 173 + + +def test_reader_shapefile_delayed_load(): + """ + Assert that the filename's extension is + ignored when reading a shapefile. + """ + with shapefile.Reader() as sf: + # assert that data request raises exception, since no file has been provided yet + with pytest.raises(shapefile.ShapefileException): + sf.shape(0) + # assert that works after loading file manually + sf.load("shapefiles/blockgroups") + assert len(sf) == 663 + + +def test_records_match_shapes(): + """ + Assert that the number of records matches + the number of shapes in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + records = sf.records() + shapes = sf.shapes() + assert len(records) == len(shapes) + + +def test_record_attributes(fields=None): + """ + Assert that record retrieves all relevant values and can + be accessed as attributes and dictionary items. + """ + # note + # second element in fields matches first element + # in record because records dont have DeletionFlag + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + # full record + full_record = sf.record(i) + # user-fetched record + if fields is not None: + # only a subset of fields + record = sf.record(i, fields=fields) + else: + # default all fields + record = full_record + fields = [ + field[0] for field in sf.fields[1:] + ] # fieldnames, sans del flag + # check correct length + assert len(record) == len(set(fields)) + # check record values (should be in same order as shapefile fields) + i = 0 + for field in sf.fields: + field_name = field[0] + if field_name in fields: + assert ( + record[i] == record[field_name] == getattr(record, field_name) + ) + i += 1 + + +def test_record_subfields(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified. + """ + fields = ["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] + test_record_attributes(fields=fields) + + +def test_record_subfields_unordered(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified, given in random order but + retrieved in the order of the shapefile fields. + """ + fields = sorted(["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"]) + test_record_attributes(fields=fields) + + +def test_record_subfields_delflag_notvalid(): + """ + Assert that reader does not consider DeletionFlag as a valid field name. + """ + fields = ["DeletionFlag", "AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] + with pytest.raises(ValueError): + test_record_attributes(fields=fields) + + +def test_record_subfields_duplicates(): + """ + Assert that reader correctly retrieves only a subset + of fields when specified, handling duplicate input fields. + """ + fields = ["AREA", "AREA", "AREA", "MALES", "MALES", "MOBILEHOME"] + test_record_attributes(fields=fields) + # check that only 3 values + with shapefile.Reader("shapefiles/blockgroups") as sf: + rec = sf.record(0, fields=fields) + assert len(rec) == len(set(fields)) + + +def test_record_subfields_empty(): + """ + Assert that reader does not retrieve any fields when given + an empty list. + """ + fields = [] + test_record_attributes(fields=fields) + # check that only 0 values + with shapefile.Reader("shapefiles/blockgroups") as sf: + rec = sf.record(0, fields=fields) + assert len(rec) == 0 + + +def test_record_as_dict(): + """ + Assert that a record object can be converted + into a dictionary and data remains correct. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + record = sf.record(0) + as_dict = record.as_dict() + + assert len(record) == len(as_dict) + for key, value in as_dict.items(): + assert record[key] == value + + +def test_record_oid(): + """ + Assert that the record's oid attribute returns + its index in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + record = sf.record(i) + assert record.oid == i + + for i, record in enumerate(sf.records()): + assert record.oid == i + + for i, record in enumerate(sf.iterRecords()): + assert record.oid == i + + for i, shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.record.oid == i + + +def test_iterRecords_start_stop(): + """ + Assert that Reader.iterRecords(start, stop) + returns the correct records, as if searched for + by index with Reader.record + """ + + with shapefile.Reader("shapefiles/blockgroups") as sf: + N = len(sf) + + # Arbitrary selection of record indices + # (there are 663 records in blockgroups.dbf). + for i in [ + 0, + 1, + 2, + 3, + 5, + 11, + 17, + 33, + 51, + 103, + 170, + 234, + 435, + 543, + N - 3, + N - 2, + N - 1, + ]: + for record in sf.iterRecords(start=i): + assert record == sf.record(record.oid) + + for record in sf.iterRecords(stop=i): + assert record == sf.record(record.oid) + + for stop in range(i, len(sf)): + # test negative indexing from end, as well as + # positive values of stop, and its default + for stop_arg in (stop, stop - len(sf)): + for record in sf.iterRecords(start=i, stop=stop_arg): + assert record == sf.record(record.oid) + + +def test_shape_oid(): + """ + Assert that the shape's oid attribute returns + its index in the shapefile. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + for i in range(len(sf)): + shape = sf.shape(i) + assert shape.oid == i + + for i, shape in enumerate(sf.shapes()): + assert shape.oid == i + + for i, shape in enumerate(sf.iterShapes()): + assert shape.oid == i + + for i, shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.shape.oid == i + + +def test_shape_oid_no_shx(): + """ + Assert that the shape's oid attribute returns + its index in the shapefile, when shx file is missing. + """ + basename = "shapefiles/blockgroups" + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with shapefile.Reader(shp=shp, dbf=dbf) as sf: + with shapefile.Reader(basename) as sf_expected: + for i in range(len(sf)): + shape = sf.shape(i) + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.shapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.iterShapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.shape.oid == i + shape_expected = sf_expected.shape(i) + assert ( + shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + ) + + +def test_reader_offsets(): + """ + Assert that reader will not read the shx offsets unless necessary, + i.e. requesting a shape index. + """ + basename = "shapefiles/blockgroups" + with shapefile.Reader(basename) as sf: + # shx offsets should not be read during loading + assert not sf._offsets + # reading a shape index should trigger reading offsets from shx file + sf.shape(3) + assert len(sf._offsets) == len(sf.shapes()) + + +def test_reader_offsets_no_shx(): + """ + Assert that reading a shapefile without a shx file will not build + the offsets unless necessary, i.e. reading all the shapes. + """ + basename = "shapefiles/blockgroups" + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with shapefile.Reader(shp=shp, dbf=dbf) as sf: + # offsets should not be built during loading + assert not sf._offsets + # reading a shape index should iterate to the shape + # but the list of offsets should remain empty + sf.shape(3) + assert not sf._offsets + # reading all the shapes should build the list of offsets + shapes = sf.shapes() + assert len(sf._offsets) == len(shapes) + + +def test_reader_numshapes(): + """ + Assert that reader reads the numShapes attribute from the + shx file header during loading. + """ + basename = "shapefiles/blockgroups" + with shapefile.Reader(basename) as sf: + # numShapes should be set during loading + assert sf.numShapes is not None + # numShapes should equal the number of shapes + assert sf.numShapes == len(sf.shapes()) + + +def test_reader_numshapes_no_shx(): + """ + Assert that reading a shapefile without a shx file will have + an unknown value for the numShapes attribute (None), and that + reading all the shapes will set the numShapes attribute. + """ + basename = "shapefiles/blockgroups" + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with shapefile.Reader(shp=shp, dbf=dbf) as sf: + # numShapes should be unknown due to missing shx file + assert sf.numShapes is None + # numShapes should be set after reading all the shapes + shapes = sf.shapes() + assert sf.numShapes == len(shapes) + + +def test_reader_len(): + """ + Assert that calling len() on reader is equal to length of + all shapes and records. + """ + basename = "shapefiles/blockgroups" + with shapefile.Reader(basename) as sf: + assert len(sf) == len(sf.records()) == len(sf.shapes()) + + +def test_reader_len_not_loaded(): + """ + Assert that calling len() on reader that hasn't loaded a shapefile + yet is equal to 0. + """ + with shapefile.Reader() as sf: + assert len(sf) == 0 + + +def test_reader_len_dbf_only(): + """ + Assert that calling len() on reader when reading a dbf file only, + is equal to length of all records. + """ + basename = "shapefiles/blockgroups" + dbf = open(basename + ".dbf", "rb") + with shapefile.Reader(dbf=dbf) as sf: + assert len(sf) == len(sf.records()) + + +def test_reader_len_no_dbf(): + """ + Assert that calling len() on reader when dbf file is missing, + is equal to length of all shapes. + """ + basename = "shapefiles/blockgroups" + shp = open(basename + ".shp", "rb") + shx = open(basename + ".shx", "rb") + with shapefile.Reader(shp=shp, shx=shx) as sf: + assert len(sf) == len(sf.shapes()) + + +def test_reader_len_no_dbf_shx(): + """ + Assert that calling len() on reader when dbf and shx file is missing, + is equal to length of all shapes. + """ + basename = "shapefiles/blockgroups" + shp = open(basename + ".shp", "rb") + with shapefile.Reader(shp=shp) as sf: + assert len(sf) == len(sf.shapes()) + + +def test_reader_corrupt_files(): + """ + Assert that reader is able to handle corrupt files by + strictly going off the header information. + """ + basename = "shapefiles/test/corrupt_too_long" + + # write a shapefile with junk byte data at end of files + with shapefile.Writer(basename) as w: + w.field("test", "C", 50) + # add 10 line geoms + for _ in range(10): + w.record("value") + w.line([[(1, 1), (1, 2), (2, 2)]]) + # add junk byte data to end of dbf and shp files + w.dbf.write(b"12345") + w.shp.write(b"12345") + + # read the corrupt shapefile and assert that it reads correctly + with shapefile.Reader(basename) as sf: + # assert correct shapefile length metadata + assert len(sf) == sf.numRecords == sf.numShapes == 10 + # assert that records are read without error + assert len(sf.records()) == 10 + # assert that didn't read the extra junk data + stopped = sf.dbf.tell() + sf.dbf.seek(0, 2) + end = sf.dbf.tell() + assert (end - stopped) == 5 + # assert that shapes are read without error + assert len(sf.shapes()) == 10 + # assert that didn't read the extra junk data + stopped = sf.shp.tell() + sf.shp.seek(0, 2) + end = sf.shp.tell() + assert (end - stopped) == 5 + + +def test_bboxfilter_shape(): + """ + Assert that applying the bbox filter to shape() correctly ignores the shape + if it falls outside, and returns it if inside. + """ + inside = [-122.4, 37.8, -122.35, 37.82] + outside = list(inside) + outside[0] *= 10 + outside[2] *= 10 + with shapefile.Reader("shapefiles/blockgroups") as sf: + assert sf.shape(0, bbox=inside) is not None + assert sf.shape(0, bbox=outside) is None + + +def test_bboxfilter_shapes(): + """ + Assert that applying the bbox filter to shapes() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shapes = sf.shapes(bbox=bbox) + # manually check bboxes + manual = shapefile.Shapes() + for shape in sf.iterShapes(): + if shapefile.bbox_overlap(shape.bbox, bbox): + manual.append(shape) + # compare + assert len(shapes) == len(manual) + # check that they line up + for shape, man in zip(shapes, manual): + assert shape.oid == man.oid + assert shape.__geo_interface__ == man.__geo_interface__ + + +def test_bboxfilter_shapes_outside(): + """ + Assert that applying the bbox filter to shapes() correctly returns + no shapes when the bbox is outside the entire shapefile. + """ + bbox = [-180, 89, -179, 90] + with shapefile.Reader("shapefiles/blockgroups") as sf: + shapes = sf.shapes(bbox=bbox) + assert len(shapes) == 0 + + +def test_bboxfilter_itershapes(): + """ + Assert that applying the bbox filter to iterShapes() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shapes = list(sf.iterShapes(bbox=bbox)) + # manually check bboxes + manual = shapefile.Shapes() + for shape in sf.iterShapes(): + if shapefile.bbox_overlap(shape.bbox, bbox): + manual.append(shape) + # compare + assert len(shapes) == len(manual) + # check that they line up + for shape, man in zip(shapes, manual): + assert shape.oid == man.oid + assert shape.__geo_interface__ == man.__geo_interface__ + + +def test_bboxfilter_shaperecord(): + """ + Assert that applying the bbox filter to shapeRecord() correctly ignores the shape + if it falls outside, and returns it if inside. + """ + inside = [-122.4, 37.8, -122.35, 37.82] + outside = list(inside) + outside[0] *= 10 + outside[2] *= 10 + with shapefile.Reader("shapefiles/blockgroups") as sf: + # inside + shaperec = sf.shapeRecord(0, bbox=inside) + assert shaperec is not None + assert shaperec.shape.oid == shaperec.record.oid + # outside + assert sf.shapeRecord(0, bbox=outside) is None + + +def test_bboxfilter_shaperecords(): + """ + Assert that applying the bbox filter to shapeRecords() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shaperecs = sf.shapeRecords(bbox=bbox) + # manually check bboxes + manual = shapefile.ShapeRecords() + for shaperec in sf.iterShapeRecords(): + if shapefile.bbox_overlap(shaperec.shape.bbox, bbox): + manual.append(shaperec) + # compare + assert len(shaperecs) == len(manual) + # check that they line up + for shaperec, man in zip(shaperecs, manual): + # oids + assert shaperec.shape.oid == shaperec.record.oid + # same shape as manual + assert shaperec.shape.oid == man.shape.oid + assert shaperec.shape.__geo_interface__ == man.shape.__geo_interface__ + # same record as manual + assert shaperec.record.oid == man.record.oid + assert shaperec.record == man.record + + +def test_bboxfilter_itershaperecords(): + """ + Assert that applying the bbox filter to iterShapeRecords() correctly ignores shapes + that fall outside, and returns those that fall inside. + """ + bbox = [-122.4, 37.8, -122.35, 37.82] + with shapefile.Reader("shapefiles/blockgroups") as sf: + # apply bbox filter + shaperecs = list(sf.iterShapeRecords(bbox=bbox)) + # manually check bboxes + manual = shapefile.ShapeRecords() + for shaperec in sf.iterShapeRecords(): + if shapefile.bbox_overlap(shaperec.shape.bbox, bbox): + manual.append(shaperec) + # compare + assert len(shaperecs) == len(manual) + # check that they line up + for shaperec, man in zip(shaperecs, manual): + # oids + assert shaperec.shape.oid == shaperec.record.oid + # same shape as manual + assert shaperec.shape.oid == man.shape.oid + assert shaperec.shape.__geo_interface__ == man.shape.__geo_interface__ + # same record as manual + assert shaperec.record.oid == man.record.oid + assert shaperec.record == man.record + + +def test_shaperecords_shaperecord(): + """ + Assert that shapeRecords returns a list of + ShapeRecord objects. + Assert that shapeRecord returns a single + ShapeRecord at the given index. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperecs = sf.shapeRecords() + shaperec = sf.shapeRecord(0) + should_match = shaperecs[0] + + # assert record is equal + assert shaperec.record == should_match.record + + # assert shape is equal + shaperec_json = shaperec.shape.__geo_interface__ + should_match_json = should_match.shape.__geo_interface__ + assert shaperec_json == should_match_json + + +def test_shaperecord_shape(): + """ + Assert that a ShapeRecord object has a shape + attribute that contains shape data. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperec = sf.shapeRecord(3) + shape = shaperec.shape + point = shape.points[0] + assert len(point) == 2 + + +def test_shaperecord_record(): + """ + Assert that a ShapeRecord object has a record + attribute that contains record data. + """ + with shapefile.Reader("shapefiles/blockgroups") as sf: + shaperec = sf.shapeRecord(3) + record = shaperec.record + + assert record[1:3] == ["060750601001", 4715] + + +def test_write_field_name_limit(tmpdir): + """ + Abc... + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.field("a" * 5, "C") # many under length limit + writer.field("a" * 9, "C") # 1 under length limit + writer.field("a" * 10, "C") # at length limit + writer.field("a" * 11, "C") # 1 over length limit + writer.field("a" * 20, "C") # many over limit + + with shapefile.Reader(filename) as reader: + fields = reader.fields[1:] + assert len(fields[0][0]) == 5 + assert len(fields[1][0]) == 9 + assert len(fields[2][0]) == 10 + assert len(fields[3][0]) == 10 + assert len(fields[4][0]) == 10 + + +def test_write_shp_only(tmpdir): + """ + Assert that specifying just the + shp argument to the shapefile writer + creates just a shp file. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(shp=filename + ".shp") as writer: + writer.point(1, 1) + assert writer.shp and not writer.shx and not writer.dbf + assert writer.shpNum == 1 + assert len(writer) == 1 + assert writer.shp.closed is True + + # assert test.shp exists + assert os.path.exists(filename + ".shp") + + # test that can read shapes + with shapefile.Reader(shp=filename + ".shp") as reader: + assert reader.shp and not reader.shx and not reader.dbf + assert (reader.numRecords, reader.numShapes) == ( + None, + None, + ) # numShapes is unknown in the absence of shx file + assert len(reader.shapes()) == 1 + + # assert test.shx does not exist + assert not os.path.exists(filename + ".shx") + + # assert test.dbf does not exist + assert not os.path.exists(filename + ".dbf") + + +def test_write_shp_shx_only(tmpdir): + """ + Assert that specifying just the shp and + shx argument to the shapefile writer + creates just a shp and shx file. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(shp=filename + ".shp", shx=filename + ".shx") as writer: + writer.point(1, 1) + assert writer.shp and writer.shx and not writer.dbf + assert writer.shpNum == 1 + assert len(writer) == 1 + assert writer.shp.closed is writer.shx.closed is True + + # assert test.shp exists + assert os.path.exists(filename + ".shp") + + # assert test.shx exists + assert os.path.exists(filename + ".shx") + + # test that can read shapes and offsets + with shapefile.Reader(shp=filename + ".shp", shx=filename + ".shx") as reader: + assert reader.shp and reader.shx and not reader.dbf + assert (reader.numRecords, reader.numShapes) == (None, 1) + reader.shape(0) # trigger reading of shx offsets + assert len(reader._offsets) == 1 + assert len(reader.shapes()) == 1 + + # assert test.dbf does not exist + assert not os.path.exists(filename + ".dbf") + + +def test_write_shp_dbf_only(tmpdir): + """ + Assert that specifying just the + shp and dbf argument to the shapefile writer + creates just a shp and dbf file. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(shp=filename + ".shp", dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") + writer.point(1, 1) + assert writer.shp and not writer.shx and writer.dbf + assert writer.shpNum == writer.recNum == 1 + assert len(writer) == 1 + assert writer.shp.closed is writer.dbf.closed is True + + # assert test.shp exists + assert os.path.exists(filename + ".shp") + + # assert test.dbf exists + assert os.path.exists(filename + ".dbf") + + # test that can read records and shapes + with shapefile.Reader(shp=filename + ".shp", dbf=filename + ".dbf") as reader: + assert reader.shp and not reader.shx and reader.dbf + assert (reader.numRecords, reader.numShapes) == ( + 1, + None, + ) # numShapes is unknown in the absence of shx file + assert len(reader.records()) == 1 + assert len(reader.shapes()) == 1 + + # assert test.shx does not exist + assert not os.path.exists(filename + ".shx") + + +def test_write_dbf_only(tmpdir): + """ + Assert that specifying just the + dbf argument to the shapefile writer + creates just a dbf file. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") + assert not writer.shp and not writer.shx and writer.dbf + assert writer.recNum == 1 + assert len(writer) == 1 + assert writer.dbf.closed is True + + # assert test.dbf exists + assert os.path.exists(filename + ".dbf") + + # test that can read records + with shapefile.Reader(dbf=filename + ".dbf") as reader: + assert not writer.shp and not writer.shx and writer.dbf + assert (reader.numRecords, reader.numShapes) == (1, None) + assert len(reader.records()) == 1 + + # assert test.shp does not exist + assert not os.path.exists(filename + ".shp") + + # assert test.shx does not exist + assert not os.path.exists(filename + ".shx") + + +def test_write_default_shp_shx_dbf(tmpdir): + """ + Assert that creating the shapefile writer without + specifying the shp, shx, or dbf arguments + creates a set of shp, shx, and dbf files. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename) as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") + writer.null() + + # assert shp, shx, dbf files exist + assert os.path.exists(filename + ".shp") + assert os.path.exists(filename + ".shx") + assert os.path.exists(filename + ".dbf") + + +def test_write_pathlike(tmpdir): + """ + Assert that path-like objects can be written. + Similar to test_write_default_shp_shx_dbf. + """ + filename = tmpdir.join("test") + assert not isinstance(filename, str) + with shapefile.Writer(filename) as writer: + writer.field("field1", "C") + writer.record("value") + writer.null() + assert (filename + ".shp").ensure() + assert (filename + ".shx").ensure() + assert (filename + ".dbf").ensure() + + +def test_write_filelike(tmpdir): + """ + Assert that file-like objects are written correctly. + """ + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") + with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") + writer.null() + + # test that filelike objects were written correctly + with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as reader: + assert len(reader) == 1 + assert reader.shape(0).shapeType == shapefile.NULL + + +def test_write_close_path(tmpdir): + """ + Assert that the Writer close() method + closes the shp, shx, and dbf files + on exit, if given paths. + """ + sf = shapefile.Writer(tmpdir.join("test")) + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") + sf.null() + sf.close() + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + # test that opens and reads correctly after + with shapefile.Reader(tmpdir.join("test")) as reader: + assert len(reader) == 1 + assert reader.shape(0).shapeType == shapefile.NULL + + +def test_write_close_filelike(tmpdir): + """ + Assert that the Writer close() method + leaves the shp, shx, and dbf files open + on exit, if given filelike objects. + """ + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") + sf = shapefile.Writer(shx=shx, dbf=dbf, shp=shp) + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") + sf.null() + sf.close() + + assert sf.shp.closed is False + assert sf.dbf.closed is False + assert sf.shx.closed is False + + # test that opens and reads correctly after + with shapefile.Reader(shx=shx, dbf=dbf, shp=shp) as reader: + assert len(reader) == 1 + assert reader.shape(0).shapeType == shapefile.NULL + + +def test_write_context_path(tmpdir): + """ + Assert that the Writer context manager + closes the shp, shx, and dbf files + on exit, if given paths. + """ + with shapefile.Writer(tmpdir.join("test")) as sf: + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") + sf.null() + + assert sf.shp.closed is True + assert sf.dbf.closed is True + assert sf.shx.closed is True + + # test that opens and reads correctly after + with shapefile.Reader(tmpdir.join("test")) as reader: + assert len(reader) == 1 + assert reader.shape(0).shapeType == shapefile.NULL + + +def test_write_context_filelike(tmpdir): + """ + Assert that the Writer context manager + leaves the shp, shx, and dbf files open + on exit, if given filelike objects. + """ + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") + with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as sf: + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") + sf.null() + + assert sf.shp.closed is False + assert sf.dbf.closed is False + assert sf.shx.closed is False + + # test that opens and reads correctly after + with shapefile.Reader(shx=shx, dbf=dbf, shp=shp) as reader: + assert len(reader) == 1 + assert reader.shape(0).shapeType == shapefile.NULL + + +def test_write_shapefile_extension_ignored(tmpdir): + """ + Assert that the filename's extension is + ignored when creating a shapefile. + """ + base = "test" + ext = ".abc" + filename = tmpdir.join(base + ext).strpath + with shapefile.Writer(filename) as writer: + writer.field("field1", "C") # required to create a valid dbf file + + # assert shp, shx, dbf files exist + basepath = tmpdir.join(base).strpath + assert os.path.exists(basepath + ".shp") + assert os.path.exists(basepath + ".shx") + assert os.path.exists(basepath + ".dbf") + + # assert test.abc does not exist + assert not os.path.exists(basepath + ext) + + +def test_write_record(tmpdir): + """ + Test that .record() correctly writes a record using either a list of *args + or a dict of **kwargs. + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.autoBalance = True + + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") + + values = ["one", "two", "three", "four"] + writer.record(*values) + writer.record(*values) + + valuedict = dict(zip(values, values)) + writer.record(**valuedict) + writer.record(**valuedict) + + with shapefile.Reader(filename) as reader: + for record in reader.iterRecords(): + assert record == values + + +def test_write_partial_record(tmpdir): + """ + Test that .record() correctly writes a partial record (given only some of the values) + using either a list of *args or a dict of **kwargs. Should fill in the gaps. + """ + filename = tmpdir.join("test.shp").strpath + with shapefile.Writer(filename) as writer: + writer.autoBalance = True + + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") + + values = ["one", "two"] + writer.record(*values) + writer.record(*values) + + valuedict = dict(zip(values, values)) + writer.record(**valuedict) + writer.record(**valuedict) + + with shapefile.Reader(filename) as reader: + expected = list(values) + expected.extend(["", ""]) + for record in reader.iterRecords(): + assert record == expected + + assert len(reader.records()) == 4 + + +def test_write_geojson(tmpdir): + """ + Assert that the output of geo interface can be written to json. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename) as w: + w.field("TEXT", "C") + w.field("NUMBER", "N") + w.field("DATE", "D") + w.record("text", 123, datetime.date(1898, 1, 30)) + w.record("text", 123, [1998, 1, 30]) + w.record("text", 123, "19980130") + w.record("text", 123, "-9999999") # faulty date + w.record(None, None, None) + w.null() + w.null() + w.null() + w.null() + w.null() + + with shapefile.Reader(filename) as r: + for feat in r: + assert json.dumps(feat.__geo_interface__) + assert json.dumps(r.shapeRecords().__geo_interface__) + assert json.dumps(r.__geo_interface__) + + +shape_types = [ + k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31 +] # exclude multipatch + + +@pytest.mark.parametrize("shape_type", shape_types) +def test_write_empty_shapefile(tmpdir, shape_type): + """ + Assert that can write an empty shapefile, for all different shape types. + """ + filename = tmpdir.join("test").strpath + with shapefile.Writer(filename, shapeType=shape_type) as w: + w.field("field1", "C") # required to create a valid dbf file + + with shapefile.Reader(filename) as r: + # test correct shape type + assert r.shapeType == shape_type + # test length 0 + assert len(r) == r.numRecords == r.numShapes == 0 + # test records are empty + assert len(r.records()) == 0 + # test shapes are empty + assert len(r.shapes()) == 0