Browse Source

Fixed #25184 -- Added support for MaxMind GeoLite2 database format

Flavio Curella 9 years ago
parent
commit
7f0953ce1f

+ 21 - 0
django/contrib/gis/geoip2/__init__.py

@@ -0,0 +1,21 @@
+"""
+This module houses the GeoIP2 object, a wrapper for the MaxMind GeoIP2(R)
+Python API (http://geoip2.readthedocs.org/). This is an alternative to the
+Python GeoIP2 interface provided by MaxMind.
+
+GeoIP(R) is a registered trademark of MaxMind, Inc.
+
+For IP-based geolocation, this module requires the GeoLite2 Country and City
+datasets, in binary format (CSV will not work!). The datasets may be
+downloaded from MaxMind at http://dev.maxmind.com/geoip/geoip2/geolite2/.
+Grab GeoLite2-Country.mmdb.gz and GeoLite2-City.mmdb.gz, and unzip them in the
+directory corresponding to settings.GEOIP_PATH.
+"""
+__all__ = ['HAS_GEOIP2']
+
+try:
+    from .base import GeoIP2, GeoIP2Exception
+    HAS_GEOIP2 = True
+    __all__ += ['GeoIP2', 'GeoIP2Exception']
+except ImportError:
+    HAS_GEOIP2 = False

+ 219 - 0
django/contrib/gis/geoip2/base.py

@@ -0,0 +1,219 @@
+import os
+import socket
+
+import geoip2.database
+
+from django.conf import settings
+from django.core.validators import ipv4_re
+from django.utils import six
+from django.utils.ipv6 import is_valid_ipv6_address
+
+from .resources import City, Country
+
+# Creating the settings dictionary with any settings, if needed.
+GEOIP_SETTINGS = {
+    'GEOIP_PATH': getattr(settings, 'GEOIP_PATH', None),
+    'GEOIP_CITY': getattr(settings, 'GEOIP_CITY', 'GeoLite2-City.mmdb'),
+    'GEOIP_COUNTRY': getattr(settings, 'GEOIP_COUNTRY', 'GeoLite2-Country.mmdb'),
+}
+
+
+class GeoIP2Exception(Exception):
+    pass
+
+
+class GeoIP2(object):
+    # The flags for GeoIP memory caching.
+    # Try MODE_MMAP_EXT, MODE_MMAP, MODE_FILE in that order.
+    MODE_AUTO = 0
+    # Use the C extension with memory map.
+    MODE_MMAP_EXT = 1
+    # Read from memory map. Pure Python.
+    MODE_MMAP = 2
+    # Read database as standard file. Pure Python.
+    MODE_FILE = 4
+    # Load database into memory. Pure Python.
+    MODE_MEMORY = 8
+    cache_options = {opt: None for opt in (0, 1, 2, 4, 8)}
+
+    # Paths to the city & country binary databases.
+    _city_file = ''
+    _country_file = ''
+
+    # Initially, pointers to GeoIP file references are NULL.
+    _city = None
+    _country = None
+
+    def __init__(self, path=None, cache=0, country=None, city=None):
+        """
+        Initialize the GeoIP object. No parameters are required to use default
+        settings. Keyword arguments may be passed in to customize the locations
+        of the GeoIP datasets.
+
+        * path: Base directory to where GeoIP data is located or the full path
+            to where the city or country data files (*.mmdb) are located.
+            Assumes that both the city and country data sets are located in
+            this directory; overrides the GEOIP_PATH setting.
+
+        * cache: The cache settings when opening up the GeoIP datasets. May be
+            an integer in (0, 1, 2, 4, 8) corresponding to the MODE_AUTO,
+            MODE_MMAP_EXT, MODE_MMAP, MODE_FILE, and MODE_MEMORY,
+            `GeoIPOptions` C API settings,  respectively. Defaults to 0,
+            meaning MODE_AUTO.
+
+        * country: The name of the GeoIP country data file. Defaults to
+            'GeoLite2-Country.mmdb'; overrides the GEOIP_COUNTRY setting.
+
+        * city: The name of the GeoIP city data file. Defaults to
+            'GeoLite2-City.mmdb'; overrides the GEOIP_CITY setting.
+        """
+        # Checking the given cache option.
+        if cache in self.cache_options:
+            self._cache = cache
+        else:
+            raise GeoIP2Exception('Invalid GeoIP caching option: %s' % cache)
+
+        # Getting the GeoIP data path.
+        if not path:
+            path = GEOIP_SETTINGS['GEOIP_PATH']
+            if not path:
+                raise GeoIP2Exception('GeoIP path must be provided via parameter or the GEOIP_PATH setting.')
+        if not isinstance(path, six.string_types):
+            raise TypeError('Invalid path type: %s' % type(path).__name__)
+
+        if os.path.isdir(path):
+            # Constructing the GeoIP database filenames using the settings
+            # dictionary. If the database files for the GeoLite country
+            # and/or city datasets exist, then try to open them.
+            country_db = os.path.join(path, country or GEOIP_SETTINGS['GEOIP_COUNTRY'])
+            if os.path.isfile(country_db):
+                self._country = geoip2.database.Reader(country_db, mode=cache)
+                self._country_file = country_db
+
+            city_db = os.path.join(path, city or GEOIP_SETTINGS['GEOIP_CITY'])
+            if os.path.isfile(city_db):
+                self._city = geoip2.database.Reader(city_db, mode=cache)
+                self._city_file = city_db
+        elif os.path.isfile(path):
+            # Otherwise, some detective work will be needed to figure out
+            # whether the given database path is for the GeoIP country or city
+            # databases.
+            reader = geoip2.database.Reader(path, mode=cache)
+            db_type = reader.metadata().database_type
+
+            if db_type.endswith('City'):
+                # GeoLite City database detected.
+                self._city = reader
+                self._city_file = path
+            elif db_type.endswith('Country'):
+                # GeoIP Country database detected.
+                self._country = reader
+                self._country_file = path
+            else:
+                raise GeoIP2Exception('Unable to recognize database edition: %s' % db_type)
+        else:
+            raise GeoIP2Exception('GeoIP path must be a valid file or directory.')
+
+    @property
+    def _reader(self):
+        if self._country:
+            return self._country
+        else:
+            return self._city
+
+    @property
+    def _country_or_city(self):
+        if self._country:
+            return self._country.country
+        else:
+            return self._city.city
+
+    def __del__(self):
+        # Cleanup any GeoIP file handles lying around.
+        if self._reader:
+            self._reader.close()
+
+    def _check_query(self, query, country=False, city=False, city_or_country=False):
+        "Helper routine for checking the query and database availability."
+        # Making sure a string was passed in for the query.
+        if not isinstance(query, six.string_types):
+            raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__)
+
+        # Extra checks for the existence of country and city databases.
+        if city_or_country and not (self._country or self._city):
+            raise GeoIP2Exception('Invalid GeoIP country and city data files.')
+        elif country and not self._country:
+            raise GeoIP2Exception('Invalid GeoIP country data file: %s' % self._country_file)
+        elif city and not self._city:
+            raise GeoIP2Exception('Invalid GeoIP city data file: %s' % self._city_file)
+
+        # Return the query string back to the caller. GeoIP2 only takes IP addresses.
+        if not (ipv4_re.match(query) or is_valid_ipv6_address(query)):
+            query = socket.gethostbyname(query)
+
+        return query
+
+    def city(self, query):
+        """
+        Return a dictionary of city information for the given IP address or
+        Fully Qualified Domain Name (FQDN). Some information in the dictionary
+        may be undefined (None).
+        """
+        enc_query = self._check_query(query, city=True)
+        return City(self._city.city(enc_query))
+
+    def country_code(self, query):
+        "Return the country code for the given IP Address or FQDN."
+        enc_query = self._check_query(query, city_or_country=True)
+        return self.country(enc_query)['country_code']
+
+    def country_name(self, query):
+        "Return the country name for the given IP Address or FQDN."
+        enc_query = self._check_query(query, city_or_country=True)
+        return self.country(enc_query)['country_name']
+
+    def country(self, query):
+        """
+        Return a dictionary with the country code and name when given an
+        IP address or a Fully Qualified Domain Name (FQDN). For example, both
+        '24.124.1.80' and 'djangoproject.com' are valid parameters.
+        """
+        # Returning the country code and name
+        enc_query = self._check_query(query, city_or_country=True)
+        return Country(self._country_or_city(enc_query))
+
+    # #### Coordinate retrieval routines ####
+    def coords(self, query, ordering=('longitude', 'latitude')):
+        cdict = self.city(query)
+        if cdict is None:
+            return None
+        else:
+            return tuple(cdict[o] for o in ordering)
+
+    def lon_lat(self, query):
+        "Return a tuple of the (longitude, latitude) for the given query."
+        return self.coords(query)
+
+    def lat_lon(self, query):
+        "Return a tuple of the (latitude, longitude) for the given query."
+        return self.coords(query, ('latitude', 'longitude'))
+
+    def geos(self, query):
+        "Return a GEOS Point object for the given query."
+        ll = self.lon_lat(query)
+        if ll:
+            from django.contrib.gis.geos import Point
+            return Point(ll, srid=4326)
+        else:
+            return None
+
+    # #### GeoIP Database Information Routines ####
+    @property
+    def info(self):
+        "Return information about the GeoIP library and databases in use."
+        meta = self._reader.metadata()
+        return 'GeoIP Library:\n\t%s.%s\n' % (meta.binary_format_major_version, meta.binary_format_minor_version)
+
+    @classmethod
+    def open(cls, full_path, cache):
+        return GeoIP2(full_path, cache)

+ 18 - 0
django/contrib/gis/geoip2/resources.py

@@ -0,0 +1,18 @@
+def City(response):
+    return {
+        'city': response.city.name,
+        'country_code': response.country.iso_code,
+        'country_name': response.country.name,
+        'dma_code': response.location.metro_code,
+        'latitude': response.location.latitude,
+        'longitude': response.location.longitude,
+        'postal_code': response.postal.code,
+        'region': response.subdivisions[0].iso_code if len(response.subdivisions) else None,
+    }
+
+
+def Country(response):
+    return {
+        'country_code': response.country.iso_code,
+        'country_name': response.country.name,
+    }

+ 2 - 0
docs/internals/contributing/writing-code/unit-tests.txt

@@ -138,6 +138,7 @@ dependencies:
 
 *  bcrypt_
 *  docutils_
+*  geoip2_
 *  jinja2_ 2.7+
 *  numpy_
 *  Pillow_
@@ -170,6 +171,7 @@ associated tests will be skipped.
 
 .. _bcrypt: https://pypi.python.org/pypi/bcrypt
 .. _docutils: https://pypi.python.org/pypi/docutils
+.. _geoip2: https://pypi.python.org/pypi/geoip2
 .. _jinja2: https://pypi.python.org/pypi/jinja2
 .. _numpy: https://pypi.python.org/pypi/numpy
 .. _Pillow: https://pypi.python.org/pypi/Pillow/

+ 173 - 0
docs/ref/contrib/gis/geoip2.txt

@@ -0,0 +1,173 @@
+=======================
+Geolocation with GeoIP2
+=======================
+
+.. module:: django.contrib.gis.geoip2
+   :synopsis: Python interface for MaxMind's GeoIP2 databases.
+
+.. versionadded:: 1.9
+
+The :class:`GeoIP2` object is a wrapper for the `MaxMind geoip2 Python
+library`__. [#]_
+
+In order to perform IP-based geolocation, the :class:`GeoIP2` object requires
+the `geoip2 Python library`__ and the GeoIP `Country` and/or `City` `datasets
+in binary format`__ (the CSV files will not work!). Grab the
+``GeoLite2-Country.mmdb.gz`` and ``GeoLite2-City.mmdb.gz`` files and unzip them
+in a directory corresponding to the :setting:`GEOIP_PATH` setting.
+
+Additionally, it is recommended to install the `libmaxminddb C library`__, so
+that ``geoip2`` can leverage the C library's faster speed.
+
+__ http://geoip2.readthedocs.org/
+__ https://pypi.python.org/pypi/geoip2
+__ http://dev.maxmind.com/geoip/geoip2/geolite2/
+__ https://github.com/maxmind/libmaxminddb
+
+Example
+=======
+
+Here is an example of its usage::
+
+     >>> from django.contrib.gis.geoip2 import GeoIP2
+     >>> g = GeoIP2()
+     >>> g.country('google.com')
+     {'country_code': 'US', 'country_name': 'United States'}
+     >>> g.city('72.14.207.99')
+     {'city': 'Mountain View',
+     'country_code': 'US',
+     'country_name': 'United States',
+     'dma_code': 807,
+     'latitude': 37.419200897216797,
+     'longitude': -122.05740356445312,
+     'postal_code': '94043',
+     'region': 'CA'}
+     >>> g.lat_lon('salon.com')
+     (39.0437, -77.4875)
+     >>> g.lon_lat('uh.edu')
+     (-95.4342, 29.834)
+     >>> g.geos('24.124.1.80').wkt
+     'POINT (-97.0000000000000000 38.0000000000000000)'
+
+``GeoIP`` Settings
+==================
+
+.. setting:: GEOIP_PATH
+
+GEOIP_PATH
+----------
+
+A string specifying the directory where the GeoIP data files are
+located. This setting is *required* unless manually specified
+with ``path`` keyword when initializing the :class:`GeoIP2` object.
+
+.. setting:: GEOIP_COUNTRY
+
+GEOIP_COUNTRY
+-------------
+
+The basename to use for the GeoIP country data file. Defaults to
+``'GeoLite2-Country.mmdb'``.
+
+.. setting:: GEOIP_CITY
+
+GEOIP_CITY
+----------
+
+The basename to use for the GeoIP city data file. Defaults to
+``'GeoLite2-City.mmdb'``.
+
+``GeoIP`` API
+=============
+
+.. class:: GeoIP2(path=None, cache=0, country=None, city=None)
+
+The ``GeoIP`` object does not require any parameters to use the default
+settings. However, at the very least the :setting:`GEOIP_PATH` setting
+should be set with the path of the location of your GeoIP datasets. The
+following initialization keywords may be used to customize any of the
+defaults.
+
+===================  =======================================================
+Keyword Arguments    Description
+===================  =======================================================
+``path``             Base directory to where GeoIP data is located or the
+                     full path to where the city or country data files
+                     (``.mmdb``) are located. Assumes that both the city and
+                     country datasets are located in this directory;
+                     overrides the :setting:`GEOIP_PATH` setting.
+
+``cache``            The cache settings when opening up the GeoIP datasets. May
+                     be an integer in (0, 1, 2, 4, 8) corresponding to the
+                     ``MODE_AUTO``, ``MODE_MMAP_EXT``, ``MODE_MMAP``, and
+                     ``GEOIP_INDEX_CACHE`` ``MODE_MEMORY`` C API settings,
+                     respectively. Defaults to 0 (``MODE_AUTO``).
+
+``country``          The name of the GeoIP country data file. Defaults
+                     to ``GeoLite2-Country.mmdb``. Setting this keyword
+                     overrides the :setting:`GEOIP_COUNTRY` setting.
+
+``city``             The name of the GeoIP city data file. Defaults to
+                     ``GeoLite2-City.mmdb``. Setting this keyword overrides
+                     the :setting:`GEOIP_CITY` setting.
+===================  =======================================================
+
+``GeoIP`` Methods
+=================
+
+Instantiating
+-------------
+
+.. classmethod:: GeoIP2.open(path, cache)
+
+This classmethod instantiates the GeoIP object from the given database path
+and given cache setting.
+
+Querying
+--------
+
+All the following querying routines may take either a string IP address
+or a fully qualified domain name (FQDN). For example, both
+``'205.186.163.125'`` and ``'djangoproject.com'`` would be valid query
+parameters.
+
+.. method:: GeoIP2.city(query)
+
+Returns a dictionary of city information for the given query. Some
+of the values in the dictionary may be undefined (``None``).
+
+.. method:: GeoIP2.country(query)
+
+Returns a dictionary with the country code and country for the given
+query.
+
+.. method:: GeoIP2.country_code(query)
+
+Returns the country code corresponding to the query.
+
+.. method:: GeoIP2.country_name(query)
+
+Returns the country name corresponding to the query.
+
+Coordinate Retrieval
+--------------------
+
+.. method:: GeoIP2.coords(query)
+
+Returns a coordinate tuple of (longitude, latitude).
+
+.. method:: GeoIP2.lon_lat(query)
+
+Returns a coordinate tuple of (longitude, latitude).
+
+.. method:: GeoIP2.lat_lon(query)
+
+Returns a coordinate tuple of (latitude, longitude),
+
+.. method:: GeoIP2.geos(query)
+
+Returns a :class:`~django.contrib.gis.geos.Point` object corresponding to the
+query.
+
+.. rubric:: Footnotes
+.. [#] GeoIP(R) is a registered trademark of MaxMind, Inc.

+ 1 - 0
docs/ref/contrib/gis/index.txt

@@ -23,6 +23,7 @@ of spatially enabled data.
    geos
    gdal
    geoip
+   geoip2
    utils
    commands
    admin

+ 3 - 0
docs/releases/1.9.txt

@@ -214,6 +214,9 @@ Minor features
   raster into a different spatial reference system by specifying a target
   ``srid``.
 
+* The new :class:`~django.contrib.gis.geoip2.GeoIP2` class allows using
+  MaxMind's GeoLite2 databases which includes support for IPv6 addresses.
+
 :mod:`django.contrib.messages`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

+ 139 - 0
tests/gis_tests/test_geoip2.py

@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import os
+import unittest
+from unittest import skipUnless
+
+from django.conf import settings
+from django.contrib.gis.geoip2 import HAS_GEOIP2
+from django.contrib.gis.geos import HAS_GEOS, GEOSGeometry
+from django.utils import six
+
+if HAS_GEOIP2:
+    from django.contrib.gis.geoip2 import GeoIP2, GeoIP2Exception
+
+
+# Note: Requires both the GeoIP country and city datasets.
+# The GEOIP_DATA path should be the only setting set (the directory
+# should contain links or the actual database files 'GeoLite2-City.mmdb' and
+# 'GeoLite2-City.mmdb'.
+@skipUnless(HAS_GEOIP2 and getattr(settings, "GEOIP_PATH", None),
+    "GeoIP is required along with the GEOIP_PATH setting.")
+class GeoIPTest(unittest.TestCase):
+    addr = '128.249.1.1'
+    fqdn = 'tmc.edu'
+
+    def test01_init(self):
+        "GeoIP initialization."
+        g1 = GeoIP2()  # Everything inferred from GeoIP path
+        path = settings.GEOIP_PATH
+        g2 = GeoIP2(path, 0)  # Passing in data path explicitly.
+        g3 = GeoIP2.open(path, 0)  # MaxMind Python API syntax.
+
+        for g in (g1, g2, g3):
+            self.assertTrue(g._country)
+            self.assertTrue(g._city)
+
+        # Only passing in the location of one database.
+        city = os.path.join(path, 'GeoLite2-City.mmdb')
+        cntry = os.path.join(path, 'GeoLite2-Country.mmdb')
+        g4 = GeoIP2(city, country='')
+        self.assertIsNone(g4._country)
+        g5 = GeoIP2(cntry, city='')
+        self.assertIsNone(g5._city)
+
+        # Improper parameters.
+        bad_params = (23, 'foo', 15.23)
+        for bad in bad_params:
+            self.assertRaises(GeoIP2Exception, GeoIP2, cache=bad)
+            if isinstance(bad, six.string_types):
+                e = GeoIP2Exception
+            else:
+                e = TypeError
+            self.assertRaises(e, GeoIP2, bad, 0)
+
+    def test02_bad_query(self):
+        "GeoIP query parameter checking."
+        cntry_g = GeoIP2(city='<foo>')
+        # No city database available, these calls should fail.
+        self.assertRaises(GeoIP2Exception, cntry_g.city, 'tmc.edu')
+        self.assertRaises(GeoIP2Exception, cntry_g.coords, 'tmc.edu')
+
+        # Non-string query should raise TypeError
+        self.assertRaises(TypeError, cntry_g.country_code, 17)
+        self.assertRaises(TypeError, cntry_g.country_name, GeoIP2)
+
+    def test03_country(self):
+        "GeoIP country querying methods."
+        g = GeoIP2(city='<foo>')
+
+        for query in (self.fqdn, self.addr):
+            self.assertEqual(
+                'US',
+                g.country_code(query),
+                'Failed for func country_code and query %s' % query
+            )
+            self.assertEqual(
+                'United States',
+                g.country_name(query),
+                'Failed for func country_name and query %s' % query
+            )
+            self.assertEqual(
+                {'country_code': 'US', 'country_name': 'United States'},
+                g.country(query)
+            )
+
+    @skipUnless(HAS_GEOS, "Geos is required")
+    def test04_city(self):
+        "GeoIP city querying methods."
+        g = GeoIP2(country='<foo>')
+
+        for query in (self.fqdn, self.addr):
+            # Country queries should still work.
+            self.assertEqual(
+                'US',
+                g.country_code(query),
+                'Failed for func country_code and query %s' % query
+            )
+            self.assertEqual(
+                'United States',
+                g.country_name(query),
+                'Failed for func country_name and query %s' % query
+            )
+            self.assertEqual(
+                {'country_code': 'US', 'country_name': 'United States'},
+                g.country(query)
+            )
+
+            # City information dictionary.
+            d = g.city(query)
+            self.assertEqual('US', d['country_code'])
+            self.assertEqual('Houston', d['city'])
+            self.assertEqual('TX', d['region'])
+
+            geom = g.geos(query)
+            self.assertIsInstance(geom, GEOSGeometry)
+            lon, lat = (-95.4010, 29.7079)
+            lat_lon = g.lat_lon(query)
+            lat_lon = (lat_lon[1], lat_lon[0])
+            for tup in (geom.tuple, g.coords(query), g.lon_lat(query), lat_lon):
+                self.assertAlmostEqual(lon, tup[0], 4)
+                self.assertAlmostEqual(lat, tup[1], 4)
+
+    def test05_unicode_response(self):
+        "GeoIP strings should be properly encoded (#16553)."
+        g = GeoIP2()
+        d = g.city("duesseldorf.de")
+        self.assertEqual('Düsseldorf', d['city'])
+        d = g.country('200.26.205.1')
+        # Some databases have only unaccented countries
+        self.assertIn(d['country_name'], ('Curaçao', 'Curacao'))
+
+    def test06_ipv6_query(self):
+        "GeoIP can lookup IPv6 addresses."
+        g = GeoIP2()
+        d = g.city('2002:81ed:c9a5::81ed:c9a5')  # IPv6 address for www.nhm.ku.edu
+        self.assertEqual('US', d['country_code'])
+        self.assertEqual('Lawrence', d['city'])
+        self.assertEqual('KS', d['region'])

+ 1 - 0
tests/requirements/base.txt

@@ -1,5 +1,6 @@
 bcrypt
 docutils
+geoip2
 jinja2 >= 2.7
 numpy
 Pillow