Java, heuristique privilégiant le sommet qui induit le plus grand graphe: 1825 1855 1873
Le code ci-dessous s'exécute en moins de 10 minutes et trouve le chemin suivant:
[wad, wadis, dis, dismay, may, mayfly, flywheels, elsewhere, erecting, ingratiate, ateliers, ersatzes, zest, esthetic, tickled, ledger, germicide, idealizes, zestful, fulling, ingrains, institute, uterine, ineptness, essaying, ingeniously, slyness, essences, cessations, onshore, ores, resoundingly, glycerine, inertness, essay, say, saying, ingenuous, ousted, tediously, sly, slyest, estrogen, genuflecting, ingestion, ionizer, zeros, roses, sesames, mes, meshed, hedonist, isthmuses, sesame, amending, ingredient, entrapment, enthuses, session, ionosphere, erectness, essayist, isthmus, mustaches, hesitatingly, glycogen, generation, ions, onset, settable, blew, lewder, deriding, ingratiates, testicles, lessen, sensitization, ionization, ionizing, ingratiating, ingenious, ouster, terrorizing, ingest, estranges, gesticulating, ingrates, testis, tissue, sue, suede, edelweiss, issuing, ingraining, ingrown, owner, nerdiest, estimation, ionospheres, rescue, cue, cueing, ingesting, ingot, got, gotten, tensor, sorrowing, ingratiated, tedious, ousting, ingratiatingly, glycerin, ringside, identifiable, bleariest, ester, terminological, calibrator, torrent, entraps, apse, pseudonym, nymphomania, niacin, cinema, emailed, led, ledges, gesticulates, testicle, clement, entail, ail, ailment, enter, terrains, inspires, restaurateur, euros, rosiest, estimates, tester, termite, iterator, torture, urethras, raspiest, estimator, tore, oregano, anointment, enthuse, useful, fulfil, filmstrip, riposte, stereotyped, pedicure, urea, readmits, itself, elf, elfin, finagles, lesbians, answerable, bleat, eatery, erythrocytes, testosterone, one, ones, nest, esteemed, medicine, inextricable, blessed, sediment, entry, try, tryout, outsources, cesarians, answered, redressed, seducer, cervical, calumniates, test, establishment, entombment, enthusiastic, tickles, lessens, ensemble, blemishes, hesitant, antic, tick, ickiest, estimable, blemished, hedgehog, hogan, gantlet, letdown, own, ownership, hippest, estates, testates, testiest, establishes, hes, hesitates, testable, bleakest, esthetes, testament, entice, iceberg, erg, ergonomic, microscope, operatives, vestibules, lesser, serenade, adenoidal, dales, lest, estrangement, entrap, raptures, resourceful, fulsome, omen, menswear, earthliest, established, hedges, gestates, testy, styes, yeshivot, voter, terrible, blender, derides, descent, enticed, cedillas, lass, assailable, bleacher, hermit, mite, item, temperas, rash, ashtray, rayon, yonder, dermis, mismanage, agendas, dash, ashy, shy, shyster, terrapins, insatiable, bleeder, derives, vestment, entangle, glen, lengthens, ensconced, ceded, deduced, cedars, arsenic, nice, ice, iced, cedar, daredevil, villa, llamas, masseuse, use, useable, bleach, achievable, bleached, hedonistic, tic, ticker, kerchieves, vessel, sell, ell, elliptic, ticket, kettles, lessee, seeps, epsilon, longboat, oath, atherosclerosis, sisterhood, oodles, lesson, sonatas, tassel, selvage, age, agent, entranced, cedes, descender, deranges, gestures, restraint, interment, enthused, seduced, cedilla, llama, amalgam, gamut, mutable, blend, endear, earthy, thymus, mussel, seltzer, zero, erodes, despot, potful, fulfillment, enthrall, allot, lotus, tussle, sledgehammered, redolent, entrapped, pedestal, talk, alkalis, listen, tended, deductible, bleeped, pedigree, reentered, redistribute, uterus, rustproofed, fed, fedora, oranges, gesundheit, either, herdsman, manes, nestles, lessor, sorrowful, fullback, acknowledges, gestured, redoubtable, blended, deduces, cesareans, answer, werewolves, vesper, perseveres, restructures, reside, ideogram, rammed, meddlesome, omens, ensign, ignores, restrains, insolent, entanglement, entrenchment, enticement, entomological, calligraphy, physical, calico, iconoclast, astringent, entertainment, entrant, antennas, nasty, stymie, miens, enslave, averred, redefine, inexorable, blenched, hedgerow, rowboat, oat, oaten, tend, endears, arson, songwriter, terminable, blent, entreaty, atypical, calypso, psoriasis, sister, term, ermine, ineligible, bleaker, kerosene, enema, emancipator, tormentor, torrider, derailment, entertains, instil, tildes, destine, inelegant, anthropomorphic, hiccup, cupolas, lastingly, glycerol, rollback, acknowledgment, entombed, bedridden, denser, servicewomen, menopause, used, sedatives, vesicle, clearinghouse, user, servant, antipodes, descry, crystalline, inexpedient, enthusiast, astonishment, entirety, etymological, calendared, redbreast, astronomer, merinos, nosedove, overpay, pay, paymaster, termagant, antiaircraft, aftercare, ares, resentful, fulcrum, rumpus, pushcart, artiste, stethoscopes, pesetas, taste, steadfast, astride, ides, destitute, utensil, silvan, vanguard, ardent, entryway, waysides, despair, airdrop, ropes, pestered, redder, derangement, entered, redeemed, medullas, lasagnas, nasal, salsas, sashay, hay, haymow, mow, mowed, wedder, derringer, germane, anemic, microfilmed, media, diatom, tomboys, oyster, terminator, toreador, dorsal, salespeople, pleased, sedater, terabit, bitten, tentacle, clergyman, manifesto, stomach, achoo, hoopla, plaza, azalea, leaven, vendor, dormant, antiparticle, cleared, redraft, afterword, ordains, insufficient, entitlement, entomb, ombudsmen, men, mental, tallyhos, hospice, icecap, cape, aperitif, tiffed, fedoras, rasped, pediatric, rickshaw, hawker, keratin, tinctures, reset, setback, acknowledgement, enthronement, entwine, inexact, actor, torpedos, dosed, sedan, dancer, cerebrum, rumple, plea, leach, ache, cheaper, per, periscopes, pestilent, entreat, eater, terser, serape, ape, apes, pesky, skycap, capped, pederast, astuter, terrace, acetaminophen, henchmen, menopausal, saltcellar, lard, ardor, dormice, icebound, underbrush, ushered, redrew, rewound, underclass, assassin, sinew, newscast, astrologer, gerund, undertaken, ken, kens, ensnared, redcap, cappuccinos, nostrum, rum, rumored, redraw, rawhide, identical, calcine, inertia, tiara, arabesque, queerer, reruns, unsold, oldie, diesel, selectmen, mentored, redden, dental, talon, longhand, and, androgen, genome, omelet, lethal, hallucinogenic, nickname, amen, menhaden, denudes, despaired, redevelop, lope, operas, rasp, aspired, redskin, kindergartens, ensnares, resultant, anthropological, callus, lustful, fulcra, crammed, mediocre, crepes, pesticide, ideas, eastbound, under, derrières, respired, rediscovered, redundant, antihero, erode, ode, odes, described, bedevil, villager, gerrymander, deride, ideograph, aphid, hid, hides, describes, besides, despoil, oilskin, kingdom, dominant, ant, antipasti, stiffens, ensured, redeemer, merchant, antiwar, warped, pederasty, stylus, lush, usher, her, hereafter, terrapin, pinnacle, clerical, caliber, bereave, avenger, geriatric, rickshas, haste, stereoscopes, pester, termini, initiator, tortures, restorer, reran, ransomed, medulla, llanos, nostril, rill, illogical, calif, lifer, fervor, vortex, textures, resister, termed, medieval, valor, lord, ordered, rediscover, verbatim, times, mesdames, mescal, caliper, periscope, opera, erasures, restart, artichokes, kestrel, reliant, antebellum, lumbago, agog, goggle, gleeful, fulfill, illustrator, tor, torque, questionnaires, resumed, mediator, tort, orthodoxy, oxymora, oratorio, riot, iotas, taster, terrific, fiche, checkpoint, interloper, perfumes, mesas, sassafras, rasher, heraldry, drywall, all, allergens, ensnare, area, rearm, armchair, airman, manufactures, resurface, acerbic, bicycle, cleverer, rerun, runt, untidy, idyllic, lichens, ensures, resend, endemic, microchip, hippopotamus, muscatel, telecast, astronaut, autopilot, lot, loth, other, heros, rosin, single, gleamed, mediaeval, valet, lettered, redound, underside, ideological, calliper, perihelia, liaison, sonic, nicknames, messenger, germicides, descendant, antigen, genital, tall, allergen, gentleman, mangos, gossipped, pedicures, resistant, antlered, redeveloped, pedagogical, calligrapher, heroins, inside, idea, deafen, fen, fencer, cerebra, bravuras, rascal, calculus, lusher, herbivores, resins, instill, illicit, citric, ricochet, heterodoxy, oxygen, generic, rice, icebox, box, boxcar, cartography, physique, quell, ellipsis, sis, sisal, sallow, lowbrow, rowel, well, elliptical, calf, alfresco, scow, cow, cowboy, boy, boyfriend, end, endeared, red, redesign, ignoramus, musket, kettledrum, rump, umped, pedlar, larvas, vassal, salmonellas, last, astronomical, calfskin, kingfisher, hereupon, ponchos, hospital, talisman, mantel, telethon, honcho, chomped, pedant, antitoxins, instant, antipastos, tossup, superintend, endangered, redskins, instigator, torpor, portico, icon, conquistador, dormer, merganser, seraphic, hiccuped, pedagogue, guerrillas, laser, sera, eraser, seraph, aphasic, sickbed, bed, bedsores, resign, ignorant, anthropocentric, richer, herdsmen, menu, enures, resuscitator, tornado, ado, adobe, obeisant, anthill, illegal, gallon, longshoremen, menace, ace, acetylene, enemas, mas, mascot, cot, cotton, tonsures, restores, result, ultraviolet, letterbox, boxer, xerography, physiological, calmer, merchantmen, mentor, torus, russet, settee, teenager, gerbil, billfold, old, olden, denatures, resubmit, mitten, ten, tenon, nonchalant, antique, queasy, asymmetric, ricksha, shanghai, haircut, cutups, upsides, descriptor, torpid, pidgin, gins, instep, tepee, peeper, perturb, urbane, anemia, miasmas, mascaras, raspy, spy, spyglass, assures, resonator, tortilla, llano, anon, nontechnical, calabash, ashram, rampart, arthropod, podia, diagram, ramp, amp, amphitheatres, resistor, tortillas, lasagna, gnat, natal, talc, alcoholic, licensee, seemed, medical, calm, almanac, nacho, choreography, phylum, lumbar, barman, mannequins, insures, respires, resound, underarm, armatures, resides, desideratum, tumult, ultrasound, underdog, dogcatcher, herald, alderwoman, mandarins, insecticides, desires, respirator, torrid, rid, rides, descant, anticlimax, maximum, mum, mummer, meringue, guesser, sermon, monogram, ramrod, rodeo, deodorant, antelopes, peso, esophagus, gusset, setups, upshot, hotel, telescope, open, penicillin, lingos, gossip, sip, siphon, honor, normal, maltreat, eaten, tenet, nether, herpes, pesticides, descend, endow, downfall, alleyway, way, waylay, layman, manicures, reshuffle, flea, lea, leash, ashen, henchman, mandolin, linchpins, inscribes, bestow, townspeople, plectrum, rumbas, baste, sternum, numb, umbilici, icicle, cleaver, vertebra, brains, insouciant, antidepressant, anthem, hemoglobin, binocular, largos, gossamer, mermaid, aid, aides, desperado, adopt, opt, optima, imam, mambos, bosun, sun, sunspot, potpourris, risky, sky, skyscraper, perturbed, bedraggle, glee, lee, leech, echo, choreographer, heraldic, dictum, tumid, midday, day, daybed, bedsides, desktop, topknot, notepaper, periodical, calendar, dare, areas, easel, selfsame, amebas, basins, ins, insulin, linnet, nettlesome, omegas, gasp, aspartame, amend, endures, researcher, herbal, balsas, sass, assault, ultimatum, tumor, mortgagor, gores, resort, orthopaedic, dictatorship, hipper, person, sonar, narc, arc, archduke, ukelele, elegant, anther, hereabout, outfox, fox, foxtrot, rotogravures, restaurant, antechamber, beret, retriever, verbena, enamor, morsel, sellout, outmaneuver, vertical, call, allergenic, niche, chessman, mandolins, insipid, pidgins, install, allures, rescind, indignant, antithetical, calicos, cosmonaut, auto, utopia, piano, another, heretical, calk, alkali, alibi, ibis, bistro, troupe, upend, endorser, serviceman, mandarin, rind, inductee, teepee, pee, peekaboo, bootstrap, rape, apertures, resin, singular, larval, valiant, antiperspirant, antipasto, stop, topical, calisthenic, nicer, cervix, vixen, xenophobic, bicep, cephalic, licit, citizenship, hippopotami, amigos, gospel, pellet, letups, upstart, artificer, cerebellum, lumberman, manic, nicknamed, medic, dickie, kielbasas, sash, ash, ashcan, cannon, nonskid, kid, kidnaper, perjures, resolver, vernacular, larkspur, puree, reefer, ferret, retains, insofar, far, fart, artisan, sandbag, bagel, gelatin, tinsel, selectman, manacle, clever, versus, sustains, inscribed, bedpan, pandemic, microprocessor, sorbet, bet, betcha, char, harem, remodel, deli, elicit, citadel, deliver, verandas, dashikis, kisser, servicemen, menthol, holiday, daydreamer, merchantman, manikins, insane, anew, newsprint, interwove, overreach, achieve, even, venom, nomad, mad, madrigal, gala, alarm, armpit, pitchman, manor, northbound, underbid, bidet, detox, toxemia, miasma, smarten, tenderloins, insult, ultra, travel, velvet, veteran, random, domino, inopportune, uneconomic, microbes, bestir, tiro, ironware, arena, enamel, melodramas, mastodon, don, donut, nut, nutmeg, meg, megalopolis, lissom, sombre, breathe, therefrom, romper, performer, merman, mangrove, overshadow, downcast, astir, tiros, rostra, trachea, heaven, ventricle, clergywoman, maneuver, verbal, ballad, ladyship, hippie, pie, piebald, alderman, manatee, teethe, thereupon, poncho, choicer, ceramic, microscopic, picayune, uneaten, tendon, donor, northeast, astound, underpass, assessor, sorghum, hum, human, mantra, trainee, needlepoint, interplay, laywoman, mannikin, kinsman, mantillas, lassie, sieve, ever, verdigris, risen, sensor, sorrel, relabel, belabor, borsch, schlep, leprechauns, unsnap, nap, napkin, kin, kingpin, pinkeye, eyeglass, assemblyman, manikin, kingship, hip, hippos, postpartum, tumbrel, relic, lichee, heehaw, haw, hawser, servicewoman, many, anyhow, howsoever, vertex, text, extra, trap, rap, rapper, periwig, wigwag, wag, wagon, gonorrhea, heave, aver, vermin, minesweeper, perplex, lexicon, congas, gastronomic, microfiche, cheapen, pentathlon, longhair, air, aircraft, aft, aftertaste, stem, tempos, postwar, war, wart, article, clear, earshot, hotshot, hotbed, bedlam, lam, lambkin, kindergarten, tenser, serum, rumor, mortar, tarmac, macabre, breech, echos, hostel, telescopic, pickerel, relay, laypeople, pleas, east, astronomic, micra, crackpot, pot, potato, atom, tombed, bedbug, bugaboo, bootleg, leg, legato, atop, topple, plethora, orangutang, angora, orangutan, tan, tandem, democrat, rat, rattan, tang, angry, gryphon, honeybee, bee, beeswax, waxen, xenon, nonplus, lustre, trellis, lisle, sleepwear, earwig, wig, wigwam, wampum, pummel, melanomas, massacre, cretin, tin, tint, interviewee, wee, weeper, persimmon, monsignori, origin, gingham, ham, hamper, pericardia, diarrhea, heartthrob, rob, robes, besom, sombreros, rosebud, bud, budgerigar, garret, retrodden, denim, nimbus, bus, bushel, helmet, metaphor, horsefly, flypaper, peritonea, near, ear, earlobes, bestowal, wall, allay, layout, outlast, astrakhan, handicapper, perusal, saltpetre, tremor, moribund, undercut, cut, cutoff, off, offal, falcon, con, consul, sultan, tannin, ninepin, pinball, allegro, grommet, metro, trot, rot, rotten, tenpin, pineapple, plectra, transit, sitar, tar, taro, arousal, salmon, moneybag, bagpipe, ipecac, cache, checkout, outrun, runaround, undersea, sea, sear, earache, cherub, rub, rubicund, underpin, pin, pint, intagli, glib, lib, libel, bellyache, cherubim, bimbos, bosuns, unsound, undertow, tow, towel, wellington, ton, tonsil, silicon, concoct, octet, tetrahedra, drachmae, maestri, tripos, possum, sum, sumac, macro, crocus, custom, tom, tomcat, catsup, sup, superstar, tarpaulin, linchpin, pinpoint, intercom, comet, met, metacarpus, pussycat, catastrophe, phenomenon, nonverbal, ballpoint, interurban, bani, animal, malt, altar, tartar, tarot, rotund, undergrad, radio, diocesan, sandbar, bar, barren, renewal, walkout, outstrip, ripen, pen, pencil, cilantro, trout, outran, rancor, corncob, cob, cobra, bra, brag, rag, ragas, gas, gasohol, holdout, output, put, putsch, schwas, was, waste, stereo, reoccur, cur, curb, urban, ban, bantam, tam, tamp, ampul, pullout, outwit, wit, withal, halo, alohas, hasp, asparagus, gusto, stove, overlap, lapel, pelvis, visit, sit, sitcom, compendia, diadem, demigod, god, goddam, dam, dampen, pennon, non, noncom, compel, pelican, cancan, can, cancel, celesta, starlit, lit, litmus, muscat, cat, catnap, naphtha, than, handcar, carpel, pellagra, grammar, mar, mariachi, chichi, chi, chimp, imp, impel, pelvic, vicar, car, caribou, bourgeoisie, siesta, stab, tab, tabu, abut, but, butterfat, fat, fathom, homespun, pun, puns, unsheathe, the, theorem, remove, overtax, tax, taxicab, cab, cabal, balsam, sambas, basal, salamis, missal, salt, altho, tho, thou, housebound, underground, underclassman, man, mannikins, insectivores, resonant, antelope, operator, torn, ornamental, tallow, low, lowered, reddens, enshrine, inefficient, entertainer, nerves, vestiges, gesturing, ingested, tediousness, essentials]
Idées fondamentales
Dans Approximation des chemins et cycles dirigés les plus longs , Bjorklund, Husfeldt et Khanna, Lecture Notes in Computer Science (2004), 222-233, les auteurs proposent que dans les graphiques à expansion clairsemée, un long chemin puisse être trouvé par une recherche gourmande qui sélectionne à chaque pas le voisin de la queue actuelle du chemin qui s'étend sur le plus grand sous-graphe de G ', où G' est le graphique d'origine avec les sommets du chemin actuel supprimés. Je ne suis pas sûr d'un bon moyen de tester si un graphe est un graphe expanseur, mais nous avons certainement affaire à un graphe clairsemé, et puisque son noyau est d'environ 20000 sommets et qu'il a un diamètre de seulement 15, il doit avoir une bonne propriétés d'expansion. J'adopte donc cette heuristique gourmande.
Étant donné un graphique G(V, E)
, nous pouvons trouver combien de sommets sont accessibles à partir de chaque sommet en utilisant Floyd-Warshall dans le Theta(V^3)
temps, ou en utilisant l'algorithme de Johnson dans le Theta(V^2 lg V + VE)
temps. Cependant, je sais que nous avons affaire à un graphique qui a un très grand composant fortement connecté (SCC), alors j'adopte une approche différente. Si nous identifions les SCC en utilisant l'algorithme de Tarjan, nous obtenons également un tri topologique pour le graphique compressé G_c(V_c, E_c)
, qui sera beaucoup plus petit, dans le O(E)
temps. Puisqu'il G_c
s'agit d'un DAG, nous pouvons calculer l'accessibilité G_c
dans le O(V_c^2 + E_c)
temps. (J'ai découvert par la suite que cela est indiqué dans l'exercice 26-2.8 du CLR ).
Étant donné que le facteur dominant dans le temps d'exécution est E
, je l'optimise en insérant des nœuds factices pour les préfixes / suffixes. Donc plutôt que 151 * 64 = 9664 bords des mots finissant -res aux mots commençant res, j'ai 151 bords des mots se terminant par -res à # res # et 64 bords de # res # aux mots commençant par res- .
Et enfin, comme chaque recherche prend environ 4 minutes sur mon ancien PC, j'essaie de combiner les résultats avec les longs trajets précédents. C'est beaucoup plus rapide et c'est ma meilleure solution actuelle.
Code
org/cheddarmonk/math/graph/Graph.java
:
package org.cheddarmonk.math.graph;
import java.util.Set;
public interface Graph<V> {
public Set<V> getAdjacent(V node);
public double getWeight(V from, V to);
}
org/cheddarmonk/math/graph/MutableGraph.java
:
package org.cheddarmonk.math.graph;
import java.util.*;
public class MutableGraph<V> implements Graph<V> {
private Map<V, Map<V, Double>> edgesBySource = new HashMap<V, Map<V, Double>>();
public void addEdge(V from, V to, double weight) {
if (!edgesBySource.containsKey(to)) edgesBySource.put(to, new HashMap<V, Double>());
Map<V, Double> e = edgesBySource.get(from);
if (e == null) edgesBySource.put(from, e = new HashMap<V, Double>());
if (e.containsKey(to)) throw new IllegalArgumentException("There is already an edge between the vertices");
e.put(to, weight);
}
@Override
public Set<V> getAdjacent(V node) {
Map<V, Double> e = edgesBySource.get(node);
if (e == null) throw new IllegalArgumentException("node doesn't appear to be in the graph");
return Collections.unmodifiableSet(e.keySet());
}
@Override
public double getWeight(V from, V to) {
Map<V, Double> e = edgesBySource.get(from);
if (e == null) throw new IllegalArgumentException("from doesn't appear to be in the graph");
if (!edgesBySource.containsKey(to)) throw new IllegalArgumentException("to doesn't appear to be in the graph");
Double c = e.get(to);
return c == null ? 0 : c.doubleValue();
}
}
org/cheddarmonk/math/graph/StronglyConnectedComponents.java
:
package org.cheddarmonk.math.graph;
import java.util.*;
/**
* A helper class for finding the strongly connected components of a graph using Tarjan's algorithm.
* http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
*/
public class StronglyConnectedComponents<V> {
private final Graph<V> g;
private List<Set<V>> topologicallySortedSccs = new ArrayList<Set<V>>();
private final LinkedList<V> S = new LinkedList<V>();
private final Set<V> S2 = new HashSet<V>();
private final Map<V, Integer> index = new HashMap<V, Integer>();
private final Map<V, Integer> lowlink = new HashMap<V, Integer>();
private StronglyConnectedComponents(Graph<V> g) {
this.g = g;
}
private void strongConnect(V v) {
int idx = index.size();
index.put(v, idx);
lowlink.put(v, idx);
S.push(v);
S2.add(v);
for (V w : g.getAdjacent(v)) {
if (!index.containsKey(w)) {
strongConnect(w);
if (lowlink.get(w) < lowlink.get(v)) {
lowlink.put(v, lowlink.get(w));
}
}
else if (S2.contains(w)) {
if (index.get(w) < lowlink.get(v)) {
lowlink.put(v, index.get(w));
}
}
}
if (lowlink.get(v).equals(index.get(v))) {
Set<V> scc = new HashSet<V>();
V w;
do {
w = S.pop();
S2.remove(w);
scc.add(w);
} while (!w.equals(v));
topologicallySortedSccs.add(scc);
}
}
public static <V> List<Set<V>> analyse(Graph<V> g, Set<V> sources) {
if (g == null) throw new IllegalArgumentException("g");
StronglyConnectedComponents<V> scc = new StronglyConnectedComponents<V>(g);
for (V v : sources) {
if (!scc.index.containsKey(v)) {
scc.strongConnect(v);
}
}
return scc.topologicallySortedSccs;
}
}
org/cheddarmonk/ppcg/PPCG.java
:
package org.cheddarmonk.ppcg;
import java.io.*;
import java.util.*;
import org.cheddarmonk.math.graph.*;
public class PPCG44922 {
private static final String path = "/usr/share/dict/words";
private static Set<String> allWords;
private static Graph<String> fullGraph;
public static void main(String[] args) {
loadGraph();
Random rnd = new Random();
rnd.setSeed(8104951619088972997L);
List<String> a = search(rnd);
rnd.setSeed(-265860022884114241L);
List<String> b = search(rnd);
List<String> chain = spliceChains(a, b);
System.out.println(chain.size());
System.out.println(chain);
}
private static List<String> search(Random rnd) {
List<String> chain = new ArrayList<String>();
chain.add(selectOptimalReachabilityCount(fullGraph, allWords, rnd));
while (true) {
String tail = chain.get(chain.size() - 1);
FilteredGraph g = new FilteredGraph(chain);
// We know that tail only has one successor, so skip ahead.
Set<String> candidates = new HashSet<String>(fullGraph.getAdjacent(suffix(tail)));
candidates.removeAll(chain);
if (candidates.isEmpty()) break;
chain.add(selectOptimalReachabilityCount(g, candidates, rnd));
}
Iterator<String> it = chain.iterator();
while (it.hasNext()) {
if (it.next().charAt(0) == '#') it.remove();
}
return chain;
}
private static List<String> spliceChains(List<String> a, List<String> b) {
Set<String> intersect = new HashSet<String>(b);
intersect.retainAll(a);
if (intersect.isEmpty()) return null;
// Splice the longest bits. To avoid cycles, we look for intersection points which have the same set of reached intersection points.
// Thus to get from one to the next we can take either route without violating the unique occurrence of each element in the spliced chain.
Set<String> left = new HashSet<String>();
Set<String> right = new HashSet<String>();
List<String> newChain = new ArrayList<String>();
// NB We assume that either a(0) and b(0) are the same or neither is in intersect.
// This is a safe assumption in practice because they're both "wad".
int idxA = 0, idxB = 0, nextA = 0, nextB = 0;
while (idxA < a.size()) {
nextA++;
while (nextA < a.size() && !intersect.contains(a.get(nextA))) nextA++;
String tailA = nextA < a.size() ? a.get(nextA) : "";
left.add(tailA);
nextB++;
while (nextB < b.size() && !intersect.contains(b.get(nextB))) nextB++;
String tailB = nextB < b.size() ? b.get(nextB) : "";
right.add(tailB);
if (left.equals(right) && tailA.equals(tailB)) {
// We take the longer of idxA to nextA-1 or idxB to nextB - 1.
if (nextA - idxA > nextB - idxB) newChain.addAll(a.subList(idxA, nextA));
else newChain.addAll(b.subList(idxB, nextB));
idxA = nextA;
idxB = nextB;
}
}
if (new HashSet<String>(newChain).size() == newChain.size()) return newChain;
throw new IllegalStateException();
}
private static void loadGraph() {
Set<String> words = new HashSet<String>();
Set<String> prefixes = new HashSet<String>();
Set<String> suffixes = new HashSet<String>();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
String line;
while ((line = br.readLine()) != null) {
if (line.length() >= 3) {
words.add(line);
prefixes.add(prefix(line));
suffixes.add(suffix(line));
}
}
br.close();
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
}
// Filter down to a core with decent reachability.
prefixes.retainAll(suffixes);
MutableGraph<String> g = new MutableGraph<String>();
Iterator<String> it = words.iterator();
while (it.hasNext()) {
String line = it.next();
if (prefixes.contains(prefix(line)) && prefixes.contains(suffix(line))) {
// In the interests of keeping the number of edges down, I insert fake vertices.
g.addEdge(prefix(line), line, 1);
g.addEdge(line, suffix(line), 1);
}
else it.remove();
}
fullGraph = g;
allWords = Collections.unmodifiableSet(words);
}
private static String prefix(String word) {
return "#" + word.substring(0, 3) + "#";
}
private static String suffix(String word) {
return "#" + word.substring(word.length() - 3, word.length()) + "#";
}
private static <V> Map<V, Integer> reachabilityCount(Graph<V> g, Set<V> sources) {
List<Set<V>> sccs = StronglyConnectedComponents.analyse(g, sources);
int n = sccs.size();
// Within a strongly connected component, each vertex can reach each other vertex.
// Then we need to also take into account the other SCCs which they can reach.
// We can exploit the fact that we already have a topological sort of the DAG of SCCs to do this efficiently.
Map<V, Integer> index = new HashMap<V, Integer>();
for (int i = 0; i < n; i++) {
for (V v : sccs.get(i)) index.put(v, i);
}
BitSet[] reachableSccs = new BitSet[n];
Map<V, Integer> reachabilityCounts = new HashMap<V, Integer>();
for (int i = 0; i < n; i++) {
Set<V> scc = sccs.get(i);
reachableSccs[i] = new BitSet(n);
reachableSccs[i].set(i);
for (V v : scc) {
for (V w : g.getAdjacent(v)) {
int j = index.get(w);
if (j < i) reachableSccs[i].or(reachableSccs[j]);
}
}
int count = 0;
for (int j = reachableSccs[i].nextSetBit(0); j >= 0; j = reachableSccs[i].nextSetBit(j+1)) {
count += sccs.get(j).size();
}
for (V v : scc) {
reachabilityCounts.put(v, count);
}
}
return reachabilityCounts;
}
private static <V extends Comparable<? super V>> V selectOptimalReachabilityCount(Graph<V> g, Set<V> candidates, Random rnd) {
Map<V, Integer> r = reachabilityCount(g, candidates);
int max = 0;
List<V> attaining = new ArrayList<V>();
for (V candidate : candidates) {
int score = r.get(candidate);
if (score > max) {
max = score;
attaining.clear();
}
if (score == max) attaining.add(candidate);
}
return selectRandom(attaining, rnd);
}
private static <T extends Comparable<? super T>> T selectRandom(Collection<T> elts, Random rnd) {
List<T> deterministic = new ArrayList<T>(elts);
Collections.sort(deterministic);
Collections.shuffle(deterministic, rnd);
return deterministic.get(0);
}
private static class FilteredGraph implements Graph<String> {
private final Set<String> filteredVertices;
public FilteredGraph(Collection<String> filteredVertices) {
this.filteredVertices = new HashSet<String>(filteredVertices);
}
@Override
public Set<String> getAdjacent(String node) {
if (filteredVertices.contains(node)) return Collections.emptySet();
Set<String> adj = new HashSet<String>(fullGraph.getAdjacent(node));
adj.removeAll(filteredVertices);
return adj;
}
@Override
public double getWeight(String from, String to) {
throw new RuntimeException("Not used");
}
}
}