package geography;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Iterator;
import java.util.NoSuchElementException;

/** Maps from a string like "3400 charles street" to a address nearby
 * whose coordinates we actually know, such as 3398 N Charles St.  
 * (The known addresses come from StreetSegments added to this AddressFinder.)
 * We try to make a decent guess even if the user types in "3403
 * charle road." or something like that.<p>
 *
 * This process of locating addresses is generally known as geocoding.
 * We are not solving the whole geocoding problem here.  An address finder
 * for the whole country (or world) would have to locate the city first.
 * We're just building an address finder for a single city.<p>
 *
 * A really good solution to this might use some kind of approximate
 * string matching ...<p>
 *
 * What we do currently is quite a bit simpler than that, but still
 * pretty reasonable.  First we look up the street in an
 * alphabetically ordered dictionary of streets we know about.  If we
 * can't find it exactly, then we pick the closest one.  Then we look
 * up the street number in a numerically ordered dictionary for that
 * street.  Again, if we can't find it exactly, we pick the closest
 * one.<p>
 *
 * We will use red-black trees for both dictionaries, courtesy of the
 * java.util.TreeMap class.  These implement the java.util.SortedMap
 * interface (Java's name for an ordered dictionary).<p>
 * 
 * Note that we have a nested data structure -- a big dictionary whose
 * elements are little dictionaries.  We use the big dictionary to turn
 * an approximate street name into a Street object, which contains a
 * little dictionary that we use to turn an approximate house number
 * on that street into an address whose location we know.<p>
 *
 * There are a few tricks we use to make the approximate matching work
 * better:<p>
 *
 * <ul>
 * 
 * <li>3400 Falls Rd is not in the little dictionary for Falls Rd, but
 *     it is in between 3398 and 3501.  We pick the first because it is 
 *     numerically closer.  <p>
 *
 * <li>"Falls Avenue" is not in the big dictionary, but it is in between
 *     Falls Ave and Falls Rd.  We pick the former because it has a
 *     longer prefix in common with "Falls Avenue".  <p>
 *
 * <li>If the user types "Charles St" rather than "N Charles St",
 *     we should still find it.  We assume that N Charles St is a kind of
 *     sub-street of Charles St.  So any addresses added to N Charles St
 *     will also be added to its "super-street", Charles St.  The user can 
 *     then find them both ways.  <p>
 *
 * <li>If the user just types "Falls" and leaves off the street type,
 *     it is in between Falkirk Rd and Falls Ave.  By a rule
 *     given above, we should pick Falls Ave -- it has a longer prefix in 
 *     common with "Falls".  But in fact what the user probably meant 
 *     was "Falls Rd", which is a major road in Baltimore.  <p>
 *
 *     Similarly, if the user just types "Charlas St", it falls between
 *     Charing Cross Rd and Charlcote Pl.  But we would be better
 *     off picking Charles St, because it is a major road, and it
 *     has just as long a prefix in common with Charles St as it
 *     does with Charlcote Pl.  <p>
 *
 *     So in fact, we do not limit our choice to just the two keys
 *     that the search key falls between alphabetically.  We consider
 *     ALL keys that share the longest possible prefix in common with
 *     the search key (e.g., "Falls" or "Charl").  Those keys form a
 *     (usually short) contiguous subsequence of the sorted
 *     dictionary.  We choose the biggest street in that
 *     subsequence. <p>
 *
 *     How do we know which is the biggest street?  The one that has
 *     the most known addresses is the one that the user is most likely 
 *     to be asking about.<p>
 *
 * <li>The user's query may have strange spacing or capitalization:
 *     e.g., "north Way" instead of "Northway Rd".  So we do not
 *     use the street names directly as keys.  Instead, we standardize
 *     them: the query "north Way" turns into the key "NORTHWAY",
 *     and "Northway Rd" is stored under the key "NORTHWAYRD"; these are
 *     similar enough for the query to work.<p>
 *    
 * <li>"north charles st" doesn't currently work -- it has to be "n charles st".
 *     We could expand common abbreviations (Rd, St, N, ...) 
 *     with their expansions (Road, Street, North) as part
 *     of standardizing the keys.  But we don't currently do this.<p>
 * </ul>
 */

public class AddressFinder {

  /** The "big map" that stores Streets, keyed by simplified versions
   * of their names (see simplify()).  The streets are sorted in
   * their "natural" (alphabetical) order using a comparator.
   */
  protected SortedMap smStreets;

  // ---------- CONSTRUCTOR AND PUBLIC METHODS ----------

  /** Constructor: New address finder, contains no information yet. */
  public AddressFinder() {
    // red-black tree.  No explicit comparator supplied,  so compareTo() will be used.
    smStreets = new TreeMap();     
  }

  /** In general, a StreetSegment specifies 2 intersections
   * and up to 4 addresses (up to two per intersection).
   * Add these addresses to our database of known addresses,
   * associated with those intersections.
   */
  public void addStreetSegment(StreetSegment ss) {
    Street street = findOrInsertStreet(ss);

    street.addIntersection(ss.pStart, ss.housenumLeftStart);
    street.addIntersection(ss.pStart, ss.housenumRightStart);
    street.addIntersection(ss.pEnd,   ss.housenumLeftEnd);
    street.addIntersection(ss.pEnd,   ss.housenumRightEnd);
  }

  /** Number of street names that this AddressFinder knows about.
   * Substreets and superstreets are both counted. 
   */
  public int numKnownStreets() {
    return smStreets.size();
  }

  /** Finds an intersection near this address. 
   * The address comes from the user and may be grungy;
   * we'll do our best.  If it looks like a valid address
   * but we just can't find anything remotely close in 
   * the database, we may return null.
   *
   * @throws DataFormatException if the address is too grungy
   *   for us to deal with.
   */
  public Intersection findIntersectionNear(String address) throws DataFormatException {

    address = address.trim();
    if (address.equals(""))
      throw new DataFormatException("Address was empty");

    // Pull an initial integer from the front of the address.
    // Assume it is the house number.  If there's no such
    // integer at the start, we'll try a house number of 0,
    // which will result in our finding a low-numbered house.

    int i=0;
    int housenum=0;
    for (i=0; i<address.length(); i++) {
      char c = address.charAt(i);
      if (!Character.isDigit(c))
	break;
      // build up housenum using place value in base 10
      housenum = 10*housenum + Character.digit(c,10);  
    }
    
    // Get the rest of string after housenum.  By the way, the kind of
    // string processing we're doing here can be handled much more
    // easily with Java 1.4's java.util.regex package (classes
    // Pattern and Matcher). But some of you are still using Java 1.3.

    String streetName = address.substring(i);  

    // Okay, we're looking for housenum on streetName.
    // Look up and return the best result we can.

    Street street = findStreetLike(streetName);
    if (street==null) 
      return null;
    else 
      return street.findIntersectionNear(housenum);
  }

  // ---------- HELPER METHODS ----------

  /** Given a StreetSegment, look up its Street by name in the database.
   * If there is no such street in the database, create one.
   */
  protected Street findOrInsertStreet(StreetSegment ss) {
    String key = keyFromStreetName(ss.name);
    Street street = (Street) smStreets.get(key);
    if (street == null) {
      street = new Street(ss);
      smStreets.put(key, street);
    }
    return street;
  }

  /** Find the street whose name is most similar to the given name,
   * breaking ties in favor of streets with more addresses.
   * Return null if there are no streets at all in the database.
   */
  protected Street findStreetLike(String name) {
    String key = keyFromStreetName(name);

    // If we can find an exact match, return it immediately.
    //
    // (If the user searched for JONES and we find JONES, then we
    // don't wish to consider other busier streets that also
    // share the first 5 characters with JONES, like JONES FALLS.)

    if (smStreets.containsKey(key))
      return (Street) smStreets.get(key);

    // Okay, no exact match to key. 
    // Find the keys right before and after it in the sorted map.

    Object[] closeKeys = closestKeys(smStreets, key);

    // One of the closeKeys must have the maximum possible prefix
    // in common with key.  
    //
    // Example: CHARLAS falls between CHARINGCROSSRD and CHARLCOTEPL.  
    // So we know that no key in smStreets has more than the 5-letter
    // prefix CHARL in common with it.  (How would you prove this??)

    int len0 = lcpLength(key, (String)closeKeys[0]);  
    int len1 = lcpLength(key, (String)closeKeys[1]);  
    int len = Math.max(len0,len1);

    // Let's get all keys that have that much in common with key.
    //
    // Example: We are now looking for all keys that start with CHARL.
    // These are keys that are >= CHARL and < CHARM.

    String keyFirst = key.substring(0,len);          // e.g., CHARL
    String keyLast = key.substring(0,len-1)          // e.g., CHAR
                      + (char)(key.charAt(len-1)+1); // e.g., L+1 = M; ignore special case of 1+Character.MAX_VALUE

    SortedMap smSub = ...
      FILL THIS IN -- 1 LINE, USING METHODS OF smStreets

    // We can now iterate over the Streets in smSub and pick
    // out the one with the most addresses, e.g., CHARLESST.
    // If there are no streets at all in smSub, return null.
    // (When would this happen??)

    int mostAddresses = -1;    
    Street bestStreet = null;  // might have to return this
    Iterator iter = ...
      FILL THIS IN -- 1 LINE, USING METHODS OF smSub
    while (iter.hasNext()) {
      Street street = (Street) iter.next();
      if (street.numKnownAddresses() > mostAddresses) {
	mostAddresses = street.numKnownAddresses();
	bestStreet = street;
      }
    }
    return bestStreet;
  }

  /* Standardize a street name by uppercasing it and removing all 
   * punctuation and spaces.  Thus, "north Way" and "Northway" both 
   * become "NORTHWAY".  This will be used as a key to store and
   * look up the street.
   */
  protected static String keyFromStreetName(String streetName) {
    String s = streetName.toUpperCase();
    StringBuffer sb = new StringBuffer();  // initially empty

    // Add all the letters and digits of s to the end of sb.
    for (int i=0; i < s.length(); i++) {
      char c = s.charAt(i);
      if (Character.isLetterOrDigit(c))
	sb.append(c);
    }

    return sb.toString();
  }  

  // ---------- UTILITY METHODS ----------

  /** Look up key in the SortedMap sm.  Returns an array of the 2 closest
   * keys (< and >=).  If there is an exact match, these keys will be
   * equal.  If there are no keys < the search key, then the first
   * element will be null, and symmetrically for >= and the second element.
   */
  protected static Object[] closestKeys(SortedMap sm, Object key) {
    Object prevKey, nextKey;
    try { 
      prevKey = ...  // biggest key among those < key
	FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm
    } catch (NoSuchElementException e) { 
      prevKey = null;
    }

    try { 
      nextKey = ...  // smallest key among those >= key
	FILL THIS IN -- 1 OR 2 LINES, USING METHODS OF sm
    } catch (NoSuchElementException e) {
      nextKey = null;
    }

    return new Object[] { prevKey, nextKey };
  }

  /* Length of longest common prefix of two strings. 
   * null is treated as "".  */
  private static int lcpLength(String s1, String s2) {
    if (s1==null || s2==null)
      return 0;

    int i=0;
    while (i<s1.length() && i<s2.length() && s1.charAt(i)==s2.charAt(i))
      i++;
    return i;
  }

  // ---------- INNER CLASSES ----------

  /** A class that represents information about a particular street.
   * This class is only used by AddressFinder, and might go away if
   * AddressFinder changed.  So it's convenient to define it right here
   * inside AddressFinder!  This is called an "inner class."  Its full
   * name is actually AddressFinder.Street.<p>
   *
   * At present, the only real information that we store is the "little map" of
   * addresses along that street.  But this class could be expanded
   * later, for example to make it possible to look up intersections by
   * their cross streets ("Charles and 34th St" as well as "3400 Charles
   * St").
   */
  protected class Street {
    /** One arbitrary segment of the street.  We keep this around
     * to get details of the street name.  
     */
    StreetSegment ssArbitrary;

    /* An intersection that findIntersectionNear can return
     * even if smIntersections is empty.  (That might happen if
     * there are no house numbers on the street.  If the
     * user looks for "Jones Falls Expressway", this is the
     * intersection we'll return. 
     */
    Intersection isctArbitrary;

    /** A street of which this street is a mere segment.  
     * Null if there is no such street.
     */
    Street superStreet;

    /** Maps house numbers to Intersections along this street. 
     * The numbers are sorted in their "natural" (numerical) order
     * using compareTo().
     */
    SortedMap smIntersections;

    /** Constructor: New street, no information about it yet. */

    public Street(StreetSegment ss) {
      ssArbitrary = ss;
      isctArbitrary = new Intersection(ss.pStart, ss.name);

      StreetSegment ssSuper = ss.makeSuperStreetSegment();
      if (ssSuper != null) {
	
	// This Street is part of a larger Street.  Store a
	// pointer to that larger Street for later use.  This
	// means finding that larger Street, or creating it
	// if it doesn't exist yet.
	//
	// An instance of an inner class has private access to the
	// instance of the enclosing class that created it!  So we
	// can call findOrInsertStreet() even though that is a method 
	// of AddressFinder, not of Street.  Cool.
	
	superStreet = findOrInsertStreet(ssSuper);
      }

      // red-black tree.  No explicit comparator supplied, so compareTo() will be used.
      smIntersections = new TreeMap();     
    }

    /** Add a new house number near the intersection at point p.  
     * We also do this for the superstreet, if any.
     * 
     * If the house number is null, then we return immediately without
     * adding anything.
     */
    public void addIntersection(Point p, Integer housenum) {
      if (housenum == null) return;

      Intersection isct = new Intersection(p, housenum+" "+ssArbitrary.name);

      smIntersections.put(housenum, isct);
      if (superStreet!=null) 
	superStreet.smIntersections.put(housenum, isct);
    }

    /** Finds the intersection closest to the given house number
     * on this street.  If this street doesn't have any known
     * numbered intersections, try an unnumbered one.  
     */
    public Intersection findIntersectionNear(int housenum) {
      Integer key = new Integer(housenum);
      Object[] closeKeys = closestKeys(smIntersections, key);
      int bestIndex;
      if (closeKeys[0]==null) {
	if (closeKeys[1]==null) 
	  return isctArbitrary;  // collection of house numbers is empty
	else
	  bestIndex=1;
      } else if (closeKeys[1]==null) {
	bestIndex=0;
      }
      else {
	// Found two keys - must decide which one is closer to ours
	int diff0 = housenum - ((Integer)closeKeys[0]).intValue();   // >= 0
	int diff1 = ((Integer)closeKeys[1]).intValue() - housenum;   // <= 0
	// Use whichever one is more closer;
	// if we still have a tie, arbitrarily pick the first.
	bestIndex = (diff0 <= diff1 ? 0 : 1);
      }
      return (Intersection) smIntersections.get(closeKeys[bestIndex]);
    }      

    /** Total number of addresses known for this street. */
    public int numKnownAddresses() {
      return smIntersections.size();
    }
  }
}

