package geography;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Iterator;
import java.util.NoSuchElementException;
/** Maps from a string like "3400 charles street" to a address nearby
* whose coordinates we actually know, such as 3398 N Charles St.
* (The known addresses come from StreetSegments added to this AddressFinder.)
* We try to make a decent guess even if the user types in "3403
* charle road." or something like that.
*
* This process of locating addresses is generally known as geocoding.
* We are not solving the whole geocoding problem here. An address finder
* for the whole country (or world) would have to locate the city first.
* We're just building an address finder for a single city.
*
* A really good solution to this might use some kind of approximate
* string matching ...
*
* What we do currently is quite a bit simpler than that, but still
* pretty reasonable. First we look up the street in an
* alphabetically ordered dictionary of streets we know about. If we
* can't find it exactly, then we pick the closest one. Then we look
* up the street number in a numerically ordered dictionary for that
* street. Again, if we can't find it exactly, we pick the closest
* one.
*
* We will use red-black trees for both dictionaries, courtesy of the
* java.util.TreeMap class. These implement the java.util.SortedMap
* interface (Java's name for an ordered dictionary).
*
* Note that we have a nested data structure -- a big dictionary whose
* elements are little dictionaries. We use the big dictionary to turn
* an approximate street name into a Street object, which contains a
* little dictionary that we use to turn an approximate house number
* on that street into an address whose location we know.
*
* There are a few tricks we use to make the approximate matching work
* better:
*
*
*
* - 3400 Falls Rd is not in the little dictionary for Falls Rd, but
* it is in between 3398 and 3501. We pick the first because it is
* numerically closer.
*
*
- "Falls Avenue" is not in the big dictionary, but it is in between
* Falls Ave and Falls Rd. We pick the former because it has a
* longer prefix in common with "Falls Avenue".
*
*
- If the user types "Charles St" rather than "N Charles St",
* we should still find it. We assume that N Charles St is a kind of
* sub-street of Charles St. So any addresses added to N Charles St
* will also be added to its "super-street", Charles St. The user can
* then find them both ways.
*
*
- If the user just types "Falls" and leaves off the street type,
* it is in between Falkirk Rd and Falls Ave. By a rule
* given above, we should pick Falls Ave -- it has a longer prefix in
* common with "Falls". But in fact what the user probably meant
* was "Falls Rd", which is a major road in Baltimore.
*
* Similarly, if the user just types "Charlas St", it falls between
* Charing Cross Rd and Charlcote Pl. But we would be better
* off picking Charles St, because it is a major road, and it
* has just as long a prefix in common with Charles St as it
* does with Charlcote Pl.
*
* So in fact, we do not limit our choice to just the two keys
* that the search key falls between alphabetically. We consider
* ALL keys that share the longest possible prefix in common with
* the search key (e.g., "Falls" or "Charl"). Those keys form a
* (usually short) contiguous subsequence of the sorted
* dictionary. We choose the biggest street in that
* subsequence.
*
* How do we know which is the biggest street? The one that has
* the most known addresses is the one that the user is most likely
* to be asking about.
*
*
- The user's query may have strange spacing or capitalization:
* e.g., "north Way" instead of "Northway Rd". So we do not
* use the street names directly as keys. Instead, we standardize
* them: the query "north Way" turns into the key "NORTHWAY",
* and "Northway Rd" is stored under the key "NORTHWAYRD"; these are
* similar enough for the query to work.
*
*
- "north charles st" doesn't currently work -- it has to be "n charles st".
* We could expand common abbreviations (Rd, St, N, ...)
* with their expansions (Road, Street, North) as part
* of standardizing the keys. But we don't currently do this.
*
*/
public class AddressFinder {
/** The "big map" that stores Streets, keyed by simplified versions
* of their names (see simplify()). The streets are sorted in
* their "natural" (alphabetical) order using a comparator.
*/
protected SortedMap smStreets;
// ---------- CONSTRUCTOR AND PUBLIC METHODS ----------
/** Constructor: New address finder, contains no information yet. */
public AddressFinder() {
// red-black tree. No explicit comparator supplied, so compareTo() will be used.
smStreets = new TreeMap();
}
/** In general, a StreetSegment specifies 2 intersections
* and up to 4 addresses (up to two per intersection).
* Add these addresses to our database of known addresses,
* associated with those intersections.
*/
public void addStreetSegment(StreetSegment ss) {
Street street = findOrInsertStreet(ss);
street.addIntersection(ss.pStart, ss.housenumLeftStart);
street.addIntersection(ss.pStart, ss.housenumRightStart);
street.addIntersection(ss.pEnd, ss.housenumLeftEnd);
street.addIntersection(ss.pEnd, ss.housenumRightEnd);
}
/** Number of street names that this AddressFinder knows about.
* Substreets and superstreets are both counted.
*/
public int numKnownStreets() {
return smStreets.size();
}
/** Finds an intersection near this address.
* The address comes from the user and may be grungy;
* we'll do our best. If it looks like a valid address
* but we just can't find anything remotely close in
* the database, we may return null.
*
* @throws DataFormatException if the address is too grungy
* for us to deal with.
*/
public Intersection findIntersectionNear(String address) throws DataFormatException {
address = address.trim();
if (address.equals(""))
throw new DataFormatException("Address was empty");
// Pull an initial integer from the front of the address.
// Assume it is the house number. If there's no such
// integer at the start, we'll try a house number of 0,
// which will result in our finding a low-numbered house.
int i=0;
int housenum=0;
for (i=0; i= CHARL and < CHARM.
String keyFirst = key.substring(0,len); // e.g., CHARL
String keyLast = key.substring(0,len-1) // e.g., CHAR
+ (char)(key.charAt(len-1)+1); // e.g., L+1 = M; ignore special case of 1+Character.MAX_VALUE
SortedMap smSub = ...
FILL THIS IN -- 1 LINE, USING METHODS OF smStreets
// We can now iterate over the Streets in smSub and pick
// out the one with the most addresses, e.g., CHARLESST.
// If there are no streets at all in smSub, return null.
// (When would this happen??)
int mostAddresses = -1;
Street bestStreet = null; // might have to return this
Iterator iter = ...
FILL THIS IN -- 1 LINE, USING METHODS OF smSub
while (iter.hasNext()) {
Street street = (Street) iter.next();
if (street.numKnownAddresses() > mostAddresses) {
mostAddresses = street.numKnownAddresses();
bestStreet = street;
}
}
return bestStreet;
}
/* Standardize a street name by uppercasing it and removing all
* punctuation and spaces. Thus, "north Way" and "Northway" both
* become "NORTHWAY". This will be used as a key to store and
* look up the street.
*/
protected static String keyFromStreetName(String streetName) {
String s = streetName.toUpperCase();
StringBuffer sb = new StringBuffer(); // initially empty
// Add all the letters and digits of s to the end of sb.
for (int i=0; i < s.length(); i++) {
char c = s.charAt(i);
if (Character.isLetterOrDigit(c))
sb.append(c);
}
return sb.toString();
}
// ---------- UTILITY METHODS ----------
/** Look up key in the SortedMap sm. Returns an array of the 2 closest
* keys (< and >=). If there is an exact match, these keys will be
* equal. If there are no keys < the search key, then the first
* element will be null, and symmetrically for >= and the second element.
*/
protected static Object[] closestKeys(SortedMap sm, Object key) {
Object prevKey, nextKey;
try {
prevKey = ... // biggest key among those < key
FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm
} catch (NoSuchElementException e) {
prevKey = null;
}
try {
nextKey = ... // smallest key among those >= key
FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm
} catch (NoSuchElementException e) {
nextKey = null;
}
return new Object[] { prevKey, nextKey };
}
/* Length of longest common prefix of two strings.
* null is treated as "". */
private static int lcpLength(String s1, String s2) {
if (s1==null || s2==null)
return 0;
int i=0;
while (i
*
* At present, the only real information that we store is the "little map" of
* addresses along that street. But this class could be expanded
* later, for example to make it possible to look up intersections by
* their cross streets ("Charles and 34th St" as well as "3400 Charles
* St").
*/
protected class Street {
/** One arbitrary segment of the street. We keep this around
* to get details of the street name.
*/
StreetSegment ssArbitrary;
/* An intersection that findIntersectionNear can return
* even if smIntersections is empty. (That might happen if
* there are no house numbers on the street. If the
* user looks for "Jones Falls Expressway", this is the
* intersection we'll return.
*/
Intersection isctArbitrary;
/** A street of which this street is a mere segment.
* Null if there is no such street.
*/
Street superStreet;
/** Maps house numbers to Intersections along this street.
* The numbers are sorted in their "natural" (numerical) order
* using compareTo().
*/
SortedMap smIntersections;
/** Constructor: New street, no information about it yet. */
public Street(StreetSegment ss) {
ssArbitrary = ss;
isctArbitrary = new Intersection(ss.pStart, ss.name);
StreetSegment ssSuper = ss.makeSuperStreetSegment();
if (ssSuper != null) {
// This Street is part of a larger Street. Store a
// pointer to that larger Street for later use. This
// means finding that larger Street, or creating it
// if it doesn't exist yet.
//
// An instance of an inner class has private access to the
// instance of the enclosing class that created it! So we
// can call findOrInsertStreet() even though that is a method
// of AddressFinder, not of Street. Cool.
superStreet = findOrInsertStreet(ssSuper);
}
// red-black tree. No explicit comparator supplied, so compareTo() will be used.
smIntersections = new TreeMap();
}
/** Add a new house number near the intersection at point p.
* We also do this for the superstreet, if any.
*
* If the house number is null, then we return immediately without
* adding anything.
*/
public void addIntersection(Point p, Integer housenum) {
if (housenum == null) return;
Intersection isct = new Intersection(p, housenum+" "+ssArbitrary.name);
smIntersections.put(housenum, isct);
if (superStreet!=null)
superStreet.smIntersections.put(housenum, isct);
}
/** Finds the intersection closest to the given house number
* on this street. If this street doesn't have any known
* numbered intersections, try an unnumbered one.
*/
public Intersection findIntersectionNear(int housenum) {
Integer key = new Integer(housenum);
Object[] closeKeys = closestKeys(smIntersections, key);
int bestIndex;
if (closeKeys[0]==null) {
if (closeKeys[1]==null)
return isctArbitrary; // collection of house numbers is empty
else
bestIndex=1;
} else if (closeKeys[1]==null) {
bestIndex=0;
}
else {
// Found two keys - must decide which one is closer to ours
int diff0 = housenum - ((Integer)closeKeys[0]).intValue(); // >= 0
int diff1 = ((Integer)closeKeys[1]).intValue() - housenum; // <= 0
// Use whichever one is more closer;
// if we still have a tie, arbitrarily pick the first.
bestIndex = (diff0 <= diff1 ? 0 : 1);
}
return (Intersection) smIntersections.get(closeKeys[bestIndex]);
}
/** Total number of addresses known for this street. */
public int numKnownAddresses() {
return smIntersections.size();
}
}
}