Skip to content

Fix connect address failures #8272

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 13 additions & 95 deletions connectd/connectd.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,52 +78,12 @@ static void try_connect_one_addr(struct connecting *connect);
* timer to call a higher function again, so has to be pre-declared. */
static void try_connect_peer(struct daemon *daemon,
const struct node_id *id,
struct wireaddr_internal *addrs TAKES,
bool dns_fallback);
struct wireaddr_internal *addrs TAKES);

/* We track peers which are important, and try to reconnect (with backoff) */
static void schedule_reconnect_if_important(struct daemon *daemon,
const struct node_id *id);

/*~ Some ISP resolvers will reply with a dummy IP to queries that would otherwise
* result in an NXDOMAIN reply. This just checks whether we have one such
* resolver upstream and remembers its reply so we can try to filter future
* dummies out.
*/
static bool broken_resolver(struct daemon *daemon)
{
struct addrinfo *addrinfo;
struct addrinfo hints;
const char *hostname = "nxdomain-test.doesntexist";
int err;

/* If they told us to never do DNS queries, don't even do this one and
* also not if we just say that we don't */
if (!daemon->use_dns || daemon->always_use_proxy) {
daemon->broken_resolver_response = NULL;
return false;
}

memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = 0;
hints.ai_flags = AI_ADDRCONFIG;
err = getaddrinfo(hostname, tal_fmt(tmpctx, "%d", 42),
&hints, &addrinfo);

/*~ Note the use of tal_dup here: it is a memdup for tal, but it's
* type-aware so it's less error-prone. */
if (err == 0) {
daemon->broken_resolver_response
= tal_dup(daemon, struct sockaddr, addrinfo->ai_addr);
freeaddrinfo(addrinfo);
} else
daemon->broken_resolver_response = NULL;

return daemon->broken_resolver_response != NULL;
}

/*~ Here we see our first tal destructor: in this case the 'struct connect'
* simply removes itself from the table of all 'connecting' structs. */
static void destroy_connecting(struct connecting *connect)
Expand Down Expand Up @@ -796,7 +756,8 @@ static void reconnect(struct important_id *imp)
/* Do gossmap lookup to find any addresses from there, and append. */
append_gossmap_addresses(&addrs, imp->daemon, &imp->id);

try_connect_peer(imp->daemon, &imp->id, take(addrs), false);
imp->reconnect_timer = NULL;
try_connect_peer(imp->daemon, &imp->id, take(addrs));
}

static void schedule_reconnect_if_important(struct daemon *daemon,
Expand All @@ -811,7 +772,7 @@ static void schedule_reconnect_if_important(struct daemon *daemon,
return;

/* Already on it? */
if (find_connecting(daemon, id))
if (find_connecting(daemon, id) || imp->reconnect_timer)
return;

/* --dev-no-reconnect? Don't reconnect. */
Expand All @@ -822,10 +783,10 @@ static void schedule_reconnect_if_important(struct daemon *daemon,
imp->reconnect_secs);
/* We fuzz the timer by up to 1 second, to avoid getting into
* simultanous-reconnect deadlocks with peer. */
notleak(new_reltimer(&daemon->timers, imp,
timerel_add(time_from_sec(imp->reconnect_secs),
time_from_usec(pseudorand(1000000))),
reconnect, imp));
imp->reconnect_timer = new_reltimer(&daemon->timers, imp,
timerel_add(time_from_sec(imp->reconnect_secs),
time_from_usec(pseudorand(1000000))),
reconnect, imp);

/* Back off next time if that fails */
imp->reconnect_secs *= 2;
Expand Down Expand Up @@ -1655,11 +1616,6 @@ static void connect_init(struct daemon *daemon, const u8 *msg)
} else
daemon->proxyaddr = NULL;

if (broken_resolver(daemon)) {
status_debug("Broken DNS resolver detected, will check for "
"dummy replies");
}

/* Figure out our addresses. */
daemon->listen_fds = setup_listeners(daemon, daemon,
proposed_wireaddr,
Expand Down Expand Up @@ -1797,39 +1753,6 @@ static const char **seednames(const tal_t *ctx, const struct node_id *id)
return seednames;
}

/*~ As a last resort, we do a DNS lookup to the lightning DNS seed to
* resolve a node name when they say to connect to it. This is synchronous,
* so connectd blocks, but it's not very common so we haven't fixed it.
*
* This "seed by DNS" approach is similar to what bitcoind uses, and in fact
* has the nice property that DNS is cached, and the seed only sees a request
* from the ISP, not directly from the user. */
static void add_seed_addrs(struct wireaddr_internal **addrs,
const struct node_id *id,
struct sockaddr *broken_reply)
{
struct wireaddr *new_addrs;
const char **hostnames = seednames(tmpctx, id);

for (size_t i = 0; i < tal_count(hostnames); i++) {
status_peer_debug(id, "Resolving %s", hostnames[i]);
new_addrs = wireaddr_from_hostname(tmpctx, hostnames[i], chainparams_get_ln_port(chainparams),
NULL, broken_reply, NULL);
if (new_addrs) {
for (size_t j = 0; j < tal_count(new_addrs); j++) {
if (new_addrs[j].type == ADDR_TYPE_DNS)
continue;
status_peer_debug(id, "Resolved %s to %s", hostnames[i],
fmt_wireaddr(tmpctx, &new_addrs[j]));
append_addr(addrs, &new_addrs[j]);
}
/* Other seeds will likely have the same information. */
return;
} else
status_peer_debug(id, "Could not resolve %s", hostnames[i]);
}
}

static bool addr_in(const struct wireaddr_internal *needle,
const struct wireaddr_internal haystack[])
{
Expand All @@ -1843,8 +1766,7 @@ static bool addr_in(const struct wireaddr_internal *needle,
/*~ Try to connect to a single peer, given some addresses (in order) */
static void try_connect_peer(struct daemon *daemon,
const struct node_id *id,
struct wireaddr_internal *addrs TAKES,
bool dns_fallback)
struct wireaddr_internal *addrs TAKES)
{
bool use_proxy = daemon->always_use_proxy;
struct connecting *connect;
Expand Down Expand Up @@ -1885,14 +1807,11 @@ static void try_connect_peer(struct daemon *daemon,
chainparams_get_ln_port(chainparams));
tal_arr_expand(&addrs, unresolved);
}
} else if (daemon->use_dns && dns_fallback) {
add_seed_addrs(&addrs, id,
daemon->broken_resolver_response);
}
}

/* Still no address? Fail immediately. Lightningd can still choose
* to retry; an address may get gossiped or appear on the DNS seed. */
/* Still no address? Fail immediately. Important ones get
* retried; an address may get gossiped. */
if (tal_count(addrs) == 0) {
connect_failed(daemon, id,
CONNECT_NO_KNOWN_ADDRESS,
Expand Down Expand Up @@ -1936,13 +1855,11 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg)
{
struct node_id id;
struct wireaddr_internal *addrs;
bool dns_fallback;
bool transient;
struct important_id *imp;

if (!fromwire_connectd_connect_to_peer(tmpctx, msg,
&id, &addrs,
&dns_fallback,
&transient))
master_badmsg(WIRE_CONNECTD_CONNECT_TO_PEER, msg);

Expand Down Expand Up @@ -1972,6 +1889,7 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg)
struct wireaddr_internal,
addrs);
imp->reconnect_secs = INITIAL_WAIT_SECONDS;
imp->reconnect_timer = NULL;
tal_add_destructor(imp, destroy_important_id);
important_id_htable_add(daemon->important_ids, imp);
} else
Expand All @@ -1981,7 +1899,7 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg)
/* Do gossmap lookup to find any addresses from there, and append. */
append_gossmap_addresses(&addrs, daemon, &id);

try_connect_peer(daemon, &id, addrs, dns_fallback);
try_connect_peer(daemon, &id, addrs);
}

/* lightningd tells us a peer should be disconnected. */
Expand Down
7 changes: 3 additions & 4 deletions connectd/connectd.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ struct important_id {

struct node_id id;
struct wireaddr_internal *addrs;

/* Backoff timer (increases by 2 each time) */
size_t reconnect_secs;
struct oneshot *reconnect_timer;
};

static const struct node_id *important_id_keyof(const struct important_id *imp)
Expand Down Expand Up @@ -293,10 +296,6 @@ struct daemon {
* resort, but doing so leaks our address so can be disabled. */
bool use_dns;

/* The address that the broken response returns instead of
* NXDOMAIN. NULL if we have not detected a broken resolver. */
struct sockaddr *broken_resolver_response;

/* File descriptors to listen on once we're activated. */
const struct listen_fd **listen_fds;

Expand Down
1 change: 0 additions & 1 deletion connectd/connectd_wire.csv
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ msgtype,connectd_connect_to_peer,2001
msgdata,connectd_connect_to_peer,id,node_id,
msgdata,connectd_connect_to_peer,len,u32,
msgdata,connectd_connect_to_peer,addrs,wireaddr_internal,len
msgdata,connectd_connect_to_peer,dns_fallback,bool,
msgdata,connectd_connect_to_peer,transient,bool,

# Connectd->master: connect failed.
Expand Down
4 changes: 2 additions & 2 deletions lightningd/connect_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ static struct command_result *json_connect(struct command *cmd,
}

subd_send_msg(cmd->ld->connectd,
take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true, true)));
take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true)));

/* Leave this here for peer_connected, connect_failed or peer_disconnect_done. */
new_connect(cmd->ld, &id_addr.id, cmd);
Expand Down Expand Up @@ -436,7 +436,7 @@ void connectd_connect_to_peer(struct lightningd *ld,
}
subd_send_msg(peer->ld->connectd,
take(towire_connectd_connect_to_peer(NULL, &peer->id,
waddr, true,
waddr,
!is_important)));
}

Expand Down
1 change: 0 additions & 1 deletion lightningd/gossip_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ static void handle_connect_to_peer(struct subd *gossip, const u8 *msg)
connectmsg = towire_connectd_connect_to_peer(NULL,
&id,
NULL, //addrhint,
false, //dns_fallback
true); //transient
subd_send_msg(gossip->ld->connectd, take(connectmsg));
}
Expand Down
Loading