package geography; import java.util.SortedMap; import java.util.TreeMap; import java.util.Iterator; import java.util.NoSuchElementException; /** Maps from a string like "3400 charles street" to a address nearby * whose coordinates we actually know, such as 3398 N Charles St. * (The known addresses come from StreetSegments added to this AddressFinder.) * We try to make a decent guess even if the user types in "3403 * charle road." or something like that.

* * This process of locating addresses is generally known as geocoding. * We are not solving the whole geocoding problem here. An address finder * for the whole country (or world) would have to locate the city first. * We're just building an address finder for a single city.

* * A really good solution to this might use some kind of approximate * string matching ...

* * What we do currently is quite a bit simpler than that, but still * pretty reasonable. First we look up the street in an * alphabetically ordered dictionary of streets we know about. If we * can't find it exactly, then we pick the closest one. Then we look * up the street number in a numerically ordered dictionary for that * street. Again, if we can't find it exactly, we pick the closest * one.

* * We will use red-black trees for both dictionaries, courtesy of the * java.util.TreeMap class. These implement the java.util.SortedMap * interface (Java's name for an ordered dictionary).

* * Note that we have a nested data structure -- a big dictionary whose * elements are little dictionaries. We use the big dictionary to turn * an approximate street name into a Street object, which contains a * little dictionary that we use to turn an approximate house number * on that street into an address whose location we know.

* * There are a few tricks we use to make the approximate matching work * better:

* *

*/ public class AddressFinder { /** The "big map" that stores Streets, keyed by simplified versions * of their names (see simplify()). The streets are sorted in * their "natural" (alphabetical) order using a comparator. */ protected SortedMap smStreets; // ---------- CONSTRUCTOR AND PUBLIC METHODS ---------- /** Constructor: New address finder, contains no information yet. */ public AddressFinder() { // red-black tree. No explicit comparator supplied, so compareTo() will be used. smStreets = new TreeMap(); } /** In general, a StreetSegment specifies 2 intersections * and up to 4 addresses (up to two per intersection). * Add these addresses to our database of known addresses, * associated with those intersections. */ public void addStreetSegment(StreetSegment ss) { Street street = findOrInsertStreet(ss); street.addIntersection(ss.pStart, ss.housenumLeftStart); street.addIntersection(ss.pStart, ss.housenumRightStart); street.addIntersection(ss.pEnd, ss.housenumLeftEnd); street.addIntersection(ss.pEnd, ss.housenumRightEnd); } /** Number of street names that this AddressFinder knows about. * Substreets and superstreets are both counted. */ public int numKnownStreets() { return smStreets.size(); } /** Finds an intersection near this address. * The address comes from the user and may be grungy; * we'll do our best. If it looks like a valid address * but we just can't find anything remotely close in * the database, we may return null. * * @throws DataFormatException if the address is too grungy * for us to deal with. */ public Intersection findIntersectionNear(String address) throws DataFormatException { address = address.trim(); if (address.equals("")) throw new DataFormatException("Address was empty"); // Pull an initial integer from the front of the address. // Assume it is the house number. If there's no such // integer at the start, we'll try a house number of 0, // which will result in our finding a low-numbered house. int i=0; int housenum=0; for (i=0; i= CHARL and < CHARM. String keyFirst = key.substring(0,len); // e.g., CHARL String keyLast = key.substring(0,len-1) // e.g., CHAR + (char)(key.charAt(len-1)+1); // e.g., L+1 = M; ignore special case of 1+Character.MAX_VALUE SortedMap smSub = ... FILL THIS IN -- 1 LINE, USING METHODS OF smStreets // We can now iterate over the Streets in smSub and pick // out the one with the most addresses, e.g., CHARLESST. // If there are no streets at all in smSub, return null. // (When would this happen??) int mostAddresses = -1; Street bestStreet = null; // might have to return this Iterator iter = ... FILL THIS IN -- 1 LINE, USING METHODS OF smSub while (iter.hasNext()) { Street street = (Street) iter.next(); if (street.numKnownAddresses() > mostAddresses) { mostAddresses = street.numKnownAddresses(); bestStreet = street; } } return bestStreet; } /* Standardize a street name by uppercasing it and removing all * punctuation and spaces. Thus, "north Way" and "Northway" both * become "NORTHWAY". This will be used as a key to store and * look up the street. */ protected static String keyFromStreetName(String streetName) { String s = streetName.toUpperCase(); StringBuffer sb = new StringBuffer(); // initially empty // Add all the letters and digits of s to the end of sb. for (int i=0; i < s.length(); i++) { char c = s.charAt(i); if (Character.isLetterOrDigit(c)) sb.append(c); } return sb.toString(); } // ---------- UTILITY METHODS ---------- /** Look up key in the SortedMap sm. Returns an array of the 2 closest * keys (< and >=). If there is an exact match, these keys will be * equal. If there are no keys < the search key, then the first * element will be null, and symmetrically for >= and the second element. */ protected static Object[] closestKeys(SortedMap sm, Object key) { Object prevKey, nextKey; try { prevKey = ... // biggest key among those < key FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm } catch (NoSuchElementException e) { prevKey = null; } try { nextKey = ... // smallest key among those >= key FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm } catch (NoSuchElementException e) { nextKey = null; } return new Object[] { prevKey, nextKey }; } /* Length of longest common prefix of two strings. * null is treated as "". */ private static int lcpLength(String s1, String s2) { if (s1==null || s2==null) return 0; int i=0; while (i * * At present, the only real information that we store is the "little map" of * addresses along that street. But this class could be expanded * later, for example to make it possible to look up intersections by * their cross streets ("Charles and 34th St" as well as "3400 Charles * St"). */ protected class Street { /** One arbitrary segment of the street. We keep this around * to get details of the street name. */ StreetSegment ssArbitrary; /* An intersection that findIntersectionNear can return * even if smIntersections is empty. (That might happen if * there are no house numbers on the street. If the * user looks for "Jones Falls Expressway", this is the * intersection we'll return. */ Intersection isctArbitrary; /** A street of which this street is a mere segment. * Null if there is no such street. */ Street superStreet; /** Maps house numbers to Intersections along this street. * The numbers are sorted in their "natural" (numerical) order * using compareTo(). */ SortedMap smIntersections; /** Constructor: New street, no information about it yet. */ public Street(StreetSegment ss) { ssArbitrary = ss; isctArbitrary = new Intersection(ss.pStart, ss.name); StreetSegment ssSuper = ss.makeSuperStreetSegment(); if (ssSuper != null) { // This Street is part of a larger Street. Store a // pointer to that larger Street for later use. This // means finding that larger Street, or creating it // if it doesn't exist yet. // // An instance of an inner class has private access to the // instance of the enclosing class that created it! So we // can call findOrInsertStreet() even though that is a method // of AddressFinder, not of Street. Cool. superStreet = findOrInsertStreet(ssSuper); } // red-black tree. No explicit comparator supplied, so compareTo() will be used. smIntersections = new TreeMap(); } /** Add a new house number near the intersection at point p. * We also do this for the superstreet, if any. * * If the house number is null, then we return immediately without * adding anything. */ public void addIntersection(Point p, Integer housenum) { if (housenum == null) return; Intersection isct = new Intersection(p, housenum+" "+ssArbitrary.name); smIntersections.put(housenum, isct); if (superStreet!=null) superStreet.smIntersections.put(housenum, isct); } /** Finds the intersection closest to the given house number * on this street. If this street doesn't have any known * numbered intersections, try an unnumbered one. */ public Intersection findIntersectionNear(int housenum) { Integer key = new Integer(housenum); Object[] closeKeys = closestKeys(smIntersections, key); int bestIndex; if (closeKeys[0]==null) { if (closeKeys[1]==null) return isctArbitrary; // collection of house numbers is empty else bestIndex=1; } else if (closeKeys[1]==null) { bestIndex=0; } else { // Found two keys - must decide which one is closer to ours int diff0 = housenum - ((Integer)closeKeys[0]).intValue(); // >= 0 int diff1 = ((Integer)closeKeys[1]).intValue() - housenum; // <= 0 // Use whichever one is more closer; // if we still have a tie, arbitrarily pick the first. bestIndex = (diff0 <= diff1 ? 0 : 1); } return (Intersection) smIntersections.get(closeKeys[bestIndex]); } /** Total number of addresses known for this street. */ public int numKnownAddresses() { return smIntersections.size(); } } }