Attachment 'resolve.c'

Download

   1 /*  Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
   2 
   3     This program is free software: you can redistribute it and/or modify
   4     it under the terms of the GNU General Public License as published by
   5     the Free Software Foundation, either version 3 of the License, or
   6     (at your option) any later version.
   7 
   8     This program is distributed in the hope that it will be useful,
   9     but WITHOUT ANY WARRANTY; without even the implied warranty of
  10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11     GNU General Public License for more details.
  12 
  13     You should have received a copy of the GNU General Public License
  14     along with this program.  If not, see <https://www.gnu.org/licenses/>.
  15  */
  16 
  17 #include <ctype.h>
  18 #include <stdio.h>
  19 #include <fcntl.h>
  20 #include <assert.h>
  21 #include <arpa/inet.h>
  22 #include <libknot/rrtype/rdname.h>
  23 #include <libknot/descriptor.h>
  24 #include <ucw/mempool.h>
  25 #include "lib/resolve.h"
  26 #include "lib/layer.h"
  27 #include "lib/rplan.h"
  28 #include "lib/layer/iterate.h"
  29 #include "lib/dnssec/ta.h"
  30 #if defined(ENABLE_COOKIES)
  31 #include "lib/cookies/control.h"
  32 #include "lib/cookies/helper.h"
  33 #include "lib/cookies/nonce.h"
  34 #else /* Define compatibility macros */
  35 #define KNOT_EDNS_OPTION_COOKIE 10
  36 #endif /* defined(ENABLE_COOKIES) */
  37 
  38 #define VERBOSE_MSG(qry, fmt...) QRVERBOSE((qry), "resl",  fmt)
  39 
  40 /** @internal Set @a yielded to all RRs with matching @a qry_uid. */
  41 static void set_yield(ranked_rr_array_t *array, const uint32_t qry_uid, const bool yielded)
  42 {
  43 	for (unsigned i = 0; i < array->len; ++i) {
  44 		ranked_rr_array_entry_t *entry = array->at[i];
  45 		if (entry->qry_uid == qry_uid) {
  46 			entry->yielded = yielded;
  47 		}
  48 	}
  49 }
  50 
  51 /**
  52  * @internal Defer execution of current query.
  53  * The current layer state and input will be pushed to a stack and resumed on next iteration.
  54  */
  55 static int consume_yield(kr_layer_t *ctx, knot_pkt_t *pkt)
  56 {
  57 	struct kr_request *req = ctx->req;
  58 	knot_pkt_t *pkt_copy = knot_pkt_new(NULL, pkt->size, &req->pool);
  59 	struct kr_layer_pickle *pickle = mm_alloc(&req->pool, sizeof(*pickle));
  60 	if (pickle && pkt_copy && knot_pkt_copy(pkt_copy, pkt) == 0) {
  61 		struct kr_query *qry = req->current_query;
  62 		pickle->api = ctx->api;
  63 		pickle->state = ctx->state;
  64 		pickle->pkt = pkt_copy;
  65 		pickle->next = qry->deferred;
  66 		qry->deferred = pickle;
  67 		set_yield(&req->answ_selected, qry->uid, true);
  68 		set_yield(&req->auth_selected, qry->uid, true);
  69 		return kr_ok();
  70 	}
  71 	return kr_error(ENOMEM);
  72 }
  73 static int begin_yield(kr_layer_t *ctx) { return kr_ok(); }
  74 static int reset_yield(kr_layer_t *ctx) { return kr_ok(); }
  75 static int finish_yield(kr_layer_t *ctx) { return kr_ok(); }
  76 static int produce_yield(kr_layer_t *ctx, knot_pkt_t *pkt) { return kr_ok(); }
  77 
  78 /** @internal Macro for iterating module layers. */
  79 #define RESUME_LAYERS(from, r, qry, func, ...) \
  80     (r)->current_query = (qry); \
  81 	for (size_t i = (from); i < (r)->ctx->modules->len; ++i) { \
  82 		struct kr_module *mod = (r)->ctx->modules->at[i]; \
  83 		if (mod->layer) { \
  84 			struct kr_layer layer = {.state = (r)->state, .api = mod->layer(mod), .req = (r)}; \
  85 			if (layer.api && layer.api->func) { \
  86 				(r)->state = layer.api->func(&layer, ##__VA_ARGS__); \
  87 				if ((r)->state == KR_STATE_YIELD) { \
  88 					func ## _yield(&layer, ##__VA_ARGS__); \
  89 					break; \
  90 				} \
  91 			} \
  92 		} \
  93 	} /* Invalidate current query. */ \
  94 	(r)->current_query = NULL
  95 
  96 /** @internal Macro for starting module iteration. */
  97 #define ITERATE_LAYERS(req, qry, func, ...) RESUME_LAYERS(0, req, qry, func, ##__VA_ARGS__)
  98 
  99 /** @internal Find layer id matching API. */
 100 static inline size_t layer_id(struct kr_request *req, const struct kr_layer_api *api) {
 101 	module_array_t *modules = req->ctx->modules;
 102 	for (size_t i = 0; i < modules->len; ++i) {
 103 		struct kr_module *mod = modules->at[i];
 104 		if (mod->layer && mod->layer(mod) == api) {
 105 			return i;
 106 		}
 107 	}
 108 	return 0; /* Not found, try all. */
 109 }
 110 
 111 /* @internal We don't need to deal with locale here */
 112 KR_CONST static inline bool isletter(unsigned chr)
 113 { return (chr | 0x20 /* tolower */) - 'a' <= 'z' - 'a'; }
 114 
 115 /* Randomize QNAME letter case.
 116  * This adds 32 bits of randomness at maximum, but that's more than an average domain name length.
 117  * https://tools.ietf.org/html/draft-vixie-dnsext-dns0x20-00
 118  */
 119 static void randomized_qname_case(knot_dname_t * restrict qname, uint32_t secret)
 120 {
 121 	assert(qname);
 122 	const int len = knot_dname_size(qname) - 2; /* Skip first, last label. */
 123 	for (int i = 0; i < len; ++i) {
 124 		if (isletter(*++qname)) {
 125 				*qname ^= ((secret >> (i & 31)) & 1) * 0x20;
 126 		}
 127 	}
 128 }
 129 
 130 /** Invalidate current NS/addr pair. */
 131 static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry)
 132 {
 133 	if (qry->ns.addr[0].ip.sa_family != AF_UNSPEC) {
 134 		uint8_t *addr = kr_nsrep_inaddr(qry->ns.addr[0]);
 135 		size_t addr_len = kr_nsrep_inaddr_len(qry->ns.addr[0]);
 136 		/* @warning _NOT_ thread-safe */
 137 		static knot_rdata_t rdata_arr[RDATA_ARR_MAX];
 138 		knot_rdata_init(rdata_arr, addr_len, addr, 0);
 139 		return kr_zonecut_del(&qry->zone_cut, qry->ns.name, rdata_arr);
 140 	} else {
 141 		return kr_zonecut_del_all(&qry->zone_cut, qry->ns.name);
 142 	}
 143 }
 144 
 145 /** This turns of QNAME minimisation if there is a non-terminal between current zone cut, and name target.
 146  *  It save several minimization steps, as the zone cut is likely final one.
 147  */
 148 static void check_empty_nonterms(struct kr_query *qry, knot_pkt_t *pkt, struct kr_cache *cache, uint32_t timestamp)
 149 {
 150 	if (qry->flags & QUERY_NO_MINIMIZE) {
 151 		return;
 152 	}
 153 
 154 	const knot_dname_t *target = qry->sname;
 155 	const knot_dname_t *cut_name = qry->zone_cut.name;
 156 	if (!target || !cut_name)
 157 		return;
 158 
 159 	struct kr_cache_entry *entry = NULL;
 160 	/* @note: The non-terminal must be direct child of zone cut (e.g. label distance <= 2),
 161 	 *        otherwise this would risk leaking information to parent if the NODATA TTD > zone cut TTD. */
 162 	int labels = knot_dname_labels(target, NULL) - knot_dname_labels(cut_name, NULL);
 163 	while (target[0] && labels > 2) {
 164 		target = knot_wire_next_label(target, NULL);
 165 		--labels;
 166 	}
 167 	for (int i = 0; i < labels; ++i) {
 168 		int ret = kr_cache_peek(cache, KR_CACHE_PKT, target, KNOT_RRTYPE_NS, &entry, &timestamp);
 169 		if (ret == 0) { /* Either NXDOMAIN or NODATA, start here. */
 170 			/* @todo We could stop resolution here for NXDOMAIN, but we can't because of broken CDNs */
 171 			qry->flags |= QUERY_NO_MINIMIZE;
 172 			kr_make_query(qry, pkt);
 173 			return;
 174 		}
 175 		assert(target[0]);
 176 		target = knot_wire_next_label(target, NULL);
 177 	}
 178 }
 179 
 180 static int ns_fetch_cut(struct kr_query *qry, const knot_dname_t *requested_name,
 181 			struct kr_request *req, knot_pkt_t *pkt)
 182 {
 183 	map_t *trust_anchors = &req->ctx->trust_anchors;
 184 	map_t *negative_anchors = &req->ctx->negative_anchors;
 185 
 186 	/* It can occur that here parent query already have
 187 	 * provably insecured zonecut which not in the cache yet. */
 188 	const uint32_t insec_flags = QUERY_DNSSEC_INSECURE | QUERY_DNSSEC_NODS;
 189 	const uint32_t cut_flags = QUERY_AWAIT_IPV4 | QUERY_AWAIT_IPV6;
 190 	const bool is_insecured = ((qry->parent != NULL) &&
 191 				   (qry->parent->flags & cut_flags) == 0 &&
 192 				   (qry->parent->flags & insec_flags) != 0);
 193 
 194 	/* Want DNSSEC if it's possible to secure this name
 195 	 * (e.g. is covered by any TA) */
 196 	if (is_insecured) {
 197 		/* If parent is unsecured we don't want DNSSEC
 198 		 * even if cut name is covered by TA. */
 199 		qry->flags &= ~QUERY_DNSSEC_WANT;
 200 		qry->flags |= QUERY_DNSSEC_INSECURE;
 201 	} else if (!kr_ta_covers(negative_anchors, qry->zone_cut.name) &&
 202 		   kr_ta_covers(trust_anchors, qry->zone_cut.name)) {
 203 		qry->flags |= QUERY_DNSSEC_WANT;
 204 	} else {
 205 		qry->flags &= ~QUERY_DNSSEC_WANT;
 206 	}
 207 
 208 	struct kr_zonecut cut_found = {0};
 209 	kr_zonecut_init(&cut_found, requested_name, req->rplan.pool);
 210 	/* Cut that has been found can differs from cut that has been requested.
 211 	 * So if not already insecured,
 212 	 * try to fetch ta & keys even if initial cut name not covered by TA */
 213 	bool secured = !is_insecured;
 214 	int ret = kr_zonecut_find_cached(req->ctx, &cut_found, requested_name,
 215 					 qry->timestamp.tv_sec, &secured);
 216 	if (ret == kr_error(ENOENT)) {
 217 		/* No cached cut found, start from SBELT
 218 		 * and issue priming query. */
 219 		ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
 220 		if (ret != 0) {
 221 			return KR_STATE_FAIL;
 222 		}
 223 		VERBOSE_MSG(qry, "=> using root hints\n");
 224 		qry->flags &= ~QUERY_AWAIT_CUT;
 225 		kr_zonecut_deinit(&cut_found);
 226 		return KR_STATE_DONE;
 227 	} else if (ret != kr_ok()) {
 228 		kr_zonecut_deinit(&cut_found);
 229 		return KR_STATE_FAIL;
 230 	}
 231 
 232 	/* Find out security status.
 233 	 * Go insecure if the zone cut is provably insecure */
 234 	if ((qry->flags & QUERY_DNSSEC_WANT) && !secured) {
 235 		VERBOSE_MSG(qry, "=> NS is provably without DS, going insecure\n");
 236 		qry->flags &= ~QUERY_DNSSEC_WANT;
 237 		qry->flags |= QUERY_DNSSEC_INSECURE;
 238 	}
 239 	/* Zonecut name can change, check it again
 240 	 * to prevent unnecessary DS & DNSKEY queries */
 241 	if (!(qry->flags & QUERY_DNSSEC_INSECURE) &&
 242 	    !kr_ta_covers(negative_anchors, cut_found.name) &&
 243 	    kr_ta_covers(trust_anchors, cut_found.name)) {
 244 		qry->flags |= QUERY_DNSSEC_WANT;
 245 	} else {
 246 		qry->flags &= ~QUERY_DNSSEC_WANT;
 247 	}
 248 	/* Check if any DNSKEY found for cached cut */
 249 	if ((qry->flags & QUERY_DNSSEC_WANT) &&
 250 	    (cut_found.key == NULL)) {
 251 		/* No DNSKEY was found for cached cut.
 252 		 * If no glue were fetched for this cut,
 253 		 * we have got circular dependancy - must fetch A\AAAA
 254 		 * from authoritative, but we have no key to verify it.
 255 		 * TODO - try to refetch cut only if no glue were fetched */
 256 		kr_zonecut_deinit(&cut_found);
 257 		if (requested_name[0] != '\0' ) {
 258 			/* If not root - try next label */
 259 			return KR_STATE_CONSUME;
 260 		}
 261 		/* No cached cut & keys found, start from SBELT */
 262 		ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
 263 		if (ret != 0) {
 264 			return KR_STATE_FAIL;
 265 		}
 266 		VERBOSE_MSG(qry, "=> using root hints\n");
 267 		qry->flags &= ~QUERY_AWAIT_CUT;
 268 		return KR_STATE_DONE;
 269 	}
 270 	/* Copy fetched name */
 271 	qry->zone_cut.name = knot_dname_copy(cut_found.name, qry->zone_cut.pool);
 272 	/* Copy fetched address set */
 273 	kr_zonecut_copy(&qry->zone_cut, &cut_found);
 274 	/* Copy fetched ta & keys */
 275 	kr_zonecut_copy_trust(&qry->zone_cut, &cut_found);
 276 	/* Check if there's a non-terminal between target and current cut. */
 277 	struct kr_cache *cache = &req->ctx->cache;
 278 	check_empty_nonterms(qry, pkt, cache, qry->timestamp.tv_sec);
 279 	/* Cut found */
 280 	return KR_STATE_PRODUCE;
 281 }
 282 
 283 static int ns_resolve_addr(struct kr_query *qry, struct kr_request *param)
 284 {
 285 	struct kr_rplan *rplan = &param->rplan;
 286 	struct kr_context *ctx = param->ctx;
 287 
 288 
 289 	/* Start NS queries from root, to avoid certain cases
 290 	 * where a NS drops out of cache and the rest is unavailable,
 291 	 * this would lead to dependency loop in current zone cut.
 292 	 * Prefer IPv6 and continue with IPv4 if not available.
 293 	 */
 294 	uint16_t next_type = 0;
 295 	if (!(qry->flags & QUERY_AWAIT_IPV6) &&
 296 	    !(ctx->options & QUERY_NO_IPV6)) {
 297 		next_type = KNOT_RRTYPE_AAAA;
 298 		qry->flags |= QUERY_AWAIT_IPV6;
 299 	} else if (!(qry->flags & QUERY_AWAIT_IPV4) &&
 300 		   !(ctx->options & QUERY_NO_IPV4)) {
 301 		next_type = KNOT_RRTYPE_A;
 302 		qry->flags |= QUERY_AWAIT_IPV4;
 303 		/* Hmm, no useable IPv6 then. */
 304 		qry->ns.reputation |= KR_NS_NOIP6;
 305 		kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
 306 	}
 307 	/* Bail out if the query is already pending or dependency loop. */
 308 	if (!next_type || kr_rplan_satisfies(qry->parent, qry->ns.name, KNOT_CLASS_IN, next_type)) {
 309 		/* Fall back to SBELT if root server query fails. */
 310 		if (!next_type && qry->zone_cut.name[0] == '\0') {
 311 			VERBOSE_MSG(qry, "=> fallback to root hints\n");
 312 			kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
 313 			qry->flags |= QUERY_NO_THROTTLE; /* Pick even bad SBELT servers */
 314 			return kr_error(EAGAIN);
 315 		}
 316 		/* No IPv4 nor IPv6, flag server as unuseable. */
 317 		VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n");
 318 		qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6;
 319 		kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
 320 		invalidate_ns(rplan, qry);
 321 		return kr_error(EHOSTUNREACH);
 322 	}
 323 	struct kr_query *next = qry;
 324 	if (knot_dname_is_equal(qry->ns.name, qry->sname) &&
 325 	    qry->stype == next_type) {
 326 		if (!(qry->flags & QUERY_NO_MINIMIZE)) {
 327 			qry->flags |= QUERY_NO_MINIMIZE;
 328 			qry->flags &= ~QUERY_AWAIT_IPV6;
 329 			qry->flags &= ~QUERY_AWAIT_IPV4;
 330 			VERBOSE_MSG(qry, "=> circular dependepcy, retrying with non-minimized name\n");
 331 		} else {
 332 			qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6;
 333 			kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
 334 			invalidate_ns(rplan, qry);
 335 			VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n");
 336 			return kr_error(EHOSTUNREACH);
 337 		}
 338 	} else {
 339 		/* Push new query to the resolution plan */
 340 		next = kr_rplan_push(rplan, qry, qry->ns.name, KNOT_CLASS_IN, next_type);
 341 		if (!next) {
 342 			return kr_error(ENOMEM);
 343 		}
 344 	}
 345 	/* At the root level with no NS addresses, add SBELT subrequest. */
 346 	int ret = 0;
 347 	if (qry->zone_cut.name[0] == '\0') {
 348 		ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
 349 		if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
 350 			kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
 351 			kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
 352 			qry->flags |= QUERY_NO_THROTTLE; /* Pick even bad SBELT servers */
 353 		}
 354 	} else {
 355 		next->flags |= QUERY_AWAIT_CUT;
 356 	}
 357 	return ret;
 358 }
 359 
 360 static int edns_put(knot_pkt_t *pkt)
 361 {
 362 	if (!pkt->opt_rr) {
 363 		return kr_ok();
 364 	}
 365 	/* Reclaim reserved size. */
 366 	int ret = knot_pkt_reclaim(pkt, knot_edns_wire_size(pkt->opt_rr));
 367 	if (ret != 0) {
 368 		return ret;
 369 	}
 370 	/* Write to packet. */
 371 	assert(pkt->current == KNOT_ADDITIONAL);
 372 	return knot_pkt_put(pkt, KNOT_COMPR_HINT_NONE, pkt->opt_rr, KNOT_PF_FREE);
 373 }
 374 
 375 /** Removes last EDNS OPT RR written to the packet. */
 376 static int edns_erase_and_reserve(knot_pkt_t *pkt)
 377 {
 378 	/* Nothing to be done. */
 379 	if (!pkt || !pkt->opt_rr) {
 380 		return 0;
 381 	}
 382 
 383 	/* Fail if the data are located elsewhere than at the end of packet. */
 384 	if (pkt->current != KNOT_ADDITIONAL ||
 385 	    pkt->opt_rr != &pkt->rr[pkt->rrset_count - 1]) {
 386 		return -1;
 387 	}
 388 
 389 	size_t len = knot_rrset_size(pkt->opt_rr);
 390 	int16_t rr_removed = pkt->opt_rr->rrs.rr_count;
 391 	/* Decrease rrset counters. */
 392 	pkt->rrset_count -= 1;
 393 	pkt->sections[pkt->current].count -= 1;
 394 	pkt->size -= len;
 395 	knot_wire_add_arcount(pkt->wire, -rr_removed); /* ADDITIONAL */
 396 
 397 	pkt->opt_rr = NULL;
 398 
 399 	/* Reserve the freed space. */
 400 	return knot_pkt_reserve(pkt, len);
 401 }
 402 
 403 static int edns_create(knot_pkt_t *pkt, knot_pkt_t *template, struct kr_request *req)
 404 {
 405 	pkt->opt_rr = knot_rrset_copy(req->ctx->opt_rr, &pkt->mm);
 406 	size_t wire_size = knot_edns_wire_size(pkt->opt_rr);
 407 #if defined(ENABLE_COOKIES)
 408 	if (req->ctx->cookie_ctx.clnt.enabled ||
 409 	    req->ctx->cookie_ctx.srvr.enabled) {
 410 		wire_size += KR_COOKIE_OPT_MAX_LEN;
 411 	}
 412 #endif /* defined(ENABLE_COOKIES) */
 413 	if (req->has_tls && req->ctx->tls_padding >= 2) {
 414 		wire_size += KNOT_EDNS_OPTION_HDRLEN + req->ctx->tls_padding;
 415 	}
 416 	return knot_pkt_reserve(pkt, wire_size);
 417 }
 418 
 419 static int answer_prepare(knot_pkt_t *answer, knot_pkt_t *query, struct kr_request *req)
 420 {
 421 	if (knot_pkt_init_response(answer, query) != 0) {
 422 		return kr_error(ENOMEM); /* Failed to initialize answer */
 423 	}
 424 	/* Handle EDNS in the query */
 425 	if (knot_pkt_has_edns(query)) {
 426 		int ret = edns_create(answer, query, req);
 427 		if (ret != 0){
 428 			return ret;
 429 		}
 430 		/* Set DO bit if set (DNSSEC requested). */
 431 		if (knot_pkt_has_dnssec(query)) {
 432 			knot_edns_set_do(answer->opt_rr);
 433 		}
 434 	}
 435 	return kr_ok();
 436 }
 437 
 438 static void write_extra_records(rr_array_t *arr, knot_pkt_t *answer)
 439 {
 440 	for (size_t i = 0; i < arr->len; ++i) {
 441 		knot_pkt_put(answer, 0, arr->at[i], 0);
 442 	}
 443 }
 444 
 445 static void write_extra_ranked_records(ranked_rr_array_t *arr, knot_pkt_t *answer)
 446 {
 447 	for (size_t i = 0; i < arr->len; ++i) {
 448 		ranked_rr_array_entry_t * entry = arr->at[i];
 449 		if (!entry->to_wire) {
 450 			continue;
 451 		}
 452 		knot_rrset_t *rr = entry->rr;
 453 		if (!knot_pkt_has_dnssec(answer)) {
 454 			if (rr->type != knot_pkt_qtype(answer) && knot_rrtype_is_dnssec(rr->type)) {
 455 				continue;
 456 			}
 457 		}
 458 		knot_pkt_put(answer, 0, rr, 0);
 459 	}
 460 }
 461 
 462 /** @internal Add an EDNS padding RR into the answer if requested and required. */
 463 static int answer_padding(struct kr_request *request)
 464 {
 465 	if (!request || !request->answer || !request->ctx) {
 466 		assert(false);
 467 		return kr_error(EINVAL);
 468 	}
 469 	uint16_t padding = request->ctx->tls_padding;
 470 	knot_pkt_t *answer = request->answer;
 471 	knot_rrset_t *opt_rr = answer->opt_rr;
 472 
 473 	if (padding < 2) {
 474 		return kr_ok();
 475 	}
 476 	int32_t max_pad_bytes = knot_edns_get_payload(opt_rr) - (answer->size + knot_rrset_size(opt_rr));
 477 
 478 	int32_t pad_bytes = MIN(knot_edns_alignment_size(answer->size, knot_rrset_size(opt_rr), padding),
 479 				max_pad_bytes);
 480 
 481 	if (pad_bytes >= 0) {
 482 		uint8_t zeros[MAX(1, pad_bytes)];
 483 		memset(zeros, 0, sizeof(zeros));
 484 		int r = knot_edns_add_option(opt_rr, KNOT_EDNS_OPTION_PADDING,
 485 					     pad_bytes, zeros, &answer->mm);
 486 		if (r != KNOT_EOK) {
 487 			knot_rrset_clear(opt_rr, &answer->mm);
 488 			return kr_error(r);
 489 		}
 490 	}
 491 	return kr_ok();
 492 }
 493 
 494 static int answer_fail(struct kr_request *request)
 495 {
 496 	knot_pkt_t *answer = request->answer;
 497 	int ret = kr_pkt_clear_payload(answer);
 498 	knot_wire_clear_ad(answer->wire);
 499 	knot_wire_clear_aa(answer->wire);
 500 	knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL);
 501 	if (ret == 0 && answer->opt_rr) {
 502 		/* OPT in SERVFAIL response is still useful for cookies/additional info. */
 503 		knot_pkt_begin(answer, KNOT_ADDITIONAL);
 504 		answer_padding(request); /* Ignore failed padding in SERVFAIL answer. */
 505 		ret = edns_put(answer);
 506 	}
 507 	return ret;
 508 }
 509 
 510 static int answer_finalize(struct kr_request *request, int state)
 511 {
 512 	struct kr_rplan *rplan = &request->rplan;
 513 	knot_pkt_t *answer = request->answer;
 514 
 515 	/* Always set SERVFAIL for bogus answers. */
 516 	if (state == KR_STATE_FAIL && rplan->pending.len > 0) {
 517 		struct kr_query *last = array_tail(rplan->pending);
 518 		if ((last->flags & QUERY_DNSSEC_WANT) && (last->flags & QUERY_DNSSEC_BOGUS)) {
 519 			return answer_fail(request);
 520 		}
 521 	}
 522 
 523 	if (request->answ_selected.len > 0) {
 524 		assert(answer->current <= KNOT_ANSWER);
 525 		/* Write answer records. */
 526 		if (answer->current < KNOT_ANSWER) {
 527 			knot_pkt_begin(answer, KNOT_ANSWER);
 528 		}
 529 		write_extra_ranked_records(&request->answ_selected, answer);
 530 	}
 531 
 532 	/* Write authority records. */
 533 	if (answer->current < KNOT_AUTHORITY) {
 534 		knot_pkt_begin(answer, KNOT_AUTHORITY);
 535 	}
 536 	write_extra_ranked_records(&request->auth_selected, answer);
 537 	/* Write additional records. */
 538 	knot_pkt_begin(answer, KNOT_ADDITIONAL);
 539 	write_extra_records(&request->additional, answer);
 540 	/* Write EDNS information */
 541 	int ret = 0;
 542 	if (answer->opt_rr) {
 543 		if (request->has_tls) {
 544 			if (answer_padding(request) != kr_ok()) {
 545 				return answer_fail(request);
 546 			}
 547 		}
 548 		knot_pkt_begin(answer, KNOT_ADDITIONAL);
 549 		ret = edns_put(answer);
 550 	}
 551 
 552 	/* Set AD=1 if succeeded and requested secured answer. */
 553 	const bool has_ad = knot_wire_get_ad(answer->wire);
 554 	knot_wire_clear_ad(answer->wire);
 555 	if (state == KR_STATE_DONE && rplan->resolved.len > 0) {
 556 		struct kr_query *last = array_tail(rplan->resolved);
 557 		/* Do not set AD for RRSIG query, as we can't validate it. */
 558 		const bool secure = (last->flags & QUERY_DNSSEC_WANT) &&
 559 				    !(last->flags & QUERY_DNSSEC_INSECURE) &&
 560 				    !(last->flags & QUERY_DNSSEC_OPTOUT);
 561 		if (!(last->flags & QUERY_STUB) /* Never set AD if forwarding. */
 562 		    && has_ad && secure
 563 		    && knot_pkt_qtype(answer) != KNOT_RRTYPE_RRSIG) {
 564 			knot_wire_set_ad(answer->wire);
 565 		}
 566 	}
 567 
 568 	return ret;
 569 }
 570 
 571 static int query_finalize(struct kr_request *request, struct kr_query *qry, knot_pkt_t *pkt)
 572 {
 573 	int ret = 0;
 574 	knot_pkt_begin(pkt, KNOT_ADDITIONAL);
 575 	if (!(qry->flags & QUERY_SAFEMODE)) {
 576 		/* Remove any EDNS records from any previous iteration. */
 577 		ret = edns_erase_and_reserve(pkt);
 578 		if (ret == 0) {
 579 			ret = edns_create(pkt, request->answer, request);
 580 		}
 581 		if (ret == 0) {
 582 			/* Stub resolution (ask for +rd and +do) */
 583 			if (qry->flags & QUERY_STUB) {
 584 				knot_wire_set_rd(pkt->wire);
 585 				if (knot_pkt_has_dnssec(request->answer)) {
 586 					knot_edns_set_do(pkt->opt_rr);
 587 				}
 588 				if (knot_wire_get_cd(request->answer->wire)) {
 589 					knot_wire_set_cd(pkt->wire);
 590 				}
 591 			/* Full resolution (ask for +cd and +do) */
 592 			} else if (qry->flags & QUERY_DNSSEC_WANT) {
 593 				knot_edns_set_do(pkt->opt_rr);
 594 				knot_wire_set_cd(pkt->wire);
 595 			}
 596 			ret = edns_put(pkt);
 597 		}
 598 	}
 599 	return ret;
 600 }
 601 
 602 int kr_resolve_begin(struct kr_request *request, struct kr_context *ctx, knot_pkt_t *answer)
 603 {
 604 	/* Initialize request */
 605 	request->ctx = ctx;
 606 	request->answer = answer;
 607 	request->options = ctx->options;
 608 	request->state = KR_STATE_CONSUME;
 609 	request->current_query = NULL;
 610 	array_init(request->additional);
 611 	array_init(request->answ_selected);
 612 	array_init(request->auth_selected);
 613 	request->answ_validated = false;
 614 	request->auth_validated = false;
 615 
 616 	/* Expect first query */
 617 	kr_rplan_init(&request->rplan, request, &request->pool);
 618 	return KR_STATE_CONSUME;
 619 }
 620 
 621 static int resolve_query(struct kr_request *request, const knot_pkt_t *packet)
 622 {
 623 	struct kr_rplan *rplan = &request->rplan;
 624 	const knot_dname_t *qname = knot_pkt_qname(packet);
 625 	uint16_t qclass = knot_pkt_qclass(packet);
 626 	uint16_t qtype = knot_pkt_qtype(packet);
 627 	bool cd_is_set = knot_wire_get_cd(packet->wire);
 628 	struct kr_query *qry = NULL;
 629 
 630 	if (qname != NULL) {
 631 		qry = kr_rplan_push(rplan, NULL, qname, qclass, qtype);
 632 	} else if (knot_wire_get_qdcount(packet->wire) == 0 &&
 633                    knot_pkt_has_edns(packet) &&
 634                    knot_edns_has_option(packet->opt_rr, KNOT_EDNS_OPTION_COOKIE)) {
 635 		/* Plan empty query only for cookies. */
 636 		qry = kr_rplan_push_empty(rplan, NULL);
 637 	}
 638 	if (!qry) {
 639 		return KR_STATE_FAIL;
 640 	}
 641 
 642 	/* Deferred zone cut lookup for this query. */
 643 	qry->flags |= QUERY_AWAIT_CUT;
 644 	/* Want DNSSEC if it's posible to secure this name (e.g. is covered by any TA) */
 645 	map_t *negative_anchors = &request->ctx->negative_anchors;
 646 	map_t *trust_anchors = &request->ctx->trust_anchors;
 647 	if ((knot_wire_get_ad(packet->wire) || knot_pkt_has_dnssec(packet)) &&
 648 	    kr_ta_covers(trust_anchors, qname) && !kr_ta_covers(negative_anchors, qname)) {
 649 		qry->flags |= QUERY_DNSSEC_WANT;
 650 	}
 651 
 652 	/* Initialize answer packet */
 653 	knot_pkt_t *answer = request->answer;
 654 	knot_wire_set_qr(answer->wire);
 655 	knot_wire_clear_aa(answer->wire);
 656 	knot_wire_set_ra(answer->wire);
 657 	knot_wire_set_rcode(answer->wire, KNOT_RCODE_NOERROR);
 658 
 659 	if (cd_is_set) {
 660 		knot_wire_set_cd(answer->wire);
 661 	} else if (qry->flags & QUERY_DNSSEC_WANT) {
 662 		knot_wire_set_ad(answer->wire);
 663 	}
 664 
 665 	/* Expect answer, pop if satisfied immediately */
 666 	request->qsource.packet = packet;
 667 	ITERATE_LAYERS(request, qry, begin);
 668 	request->qsource.packet = NULL;
 669 	if (request->state == KR_STATE_DONE) {
 670 		kr_rplan_pop(rplan, qry);
 671 	}
 672 	return request->state;
 673 }
 674 
 675 KR_PURE static bool kr_inaddr_equal(const struct sockaddr *a, const struct sockaddr *b)
 676 {
 677 	const int a_len = kr_inaddr_len(a);
 678 	const int b_len = kr_inaddr_len(b);
 679 	return a_len == b_len && memcmp(kr_inaddr(a), kr_inaddr(b), a_len) == 0;
 680 }
 681 
 682 static void update_nslist_rtt(struct kr_context *ctx, struct kr_query *qry, const struct sockaddr *src)
 683 {
 684 	/* Do not track in safe mode. */
 685 	if (qry->flags & QUERY_SAFEMODE) {
 686 		return;
 687 	}
 688 
 689 	/* Calculate total resolution time from the time the query was generated. */
 690 	struct timeval now;
 691 	gettimeofday(&now, NULL);
 692 	unsigned elapsed = time_diff(&qry->timestamp, &now);
 693 
 694 	/* NSs in the preference list prior to the one who responded will be penalised
 695 	 * with the RETRY timer interval. This is because we know they didn't respond
 696 	 * for N retries, so their RTT must be at least N * RETRY.
 697 	 * The NS in the preference list that responded will have RTT relative to the
 698 	 * time when the query was sent out, not when it was originated.
 699 	 */
 700 	for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
 701 		const struct sockaddr *addr = &qry->ns.addr[i].ip;
 702 		if (addr->sa_family == AF_UNSPEC) {
 703 			break;
 704 		}
 705 		/* If this address is the source of the answer, update its RTT */
 706 		if (kr_inaddr_equal(src, addr)) {
 707 			kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_UPDATE);
 708 			WITH_VERBOSE {
 709 				char addr_str[INET6_ADDRSTRLEN];
 710 				inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
 711 				VERBOSE_MSG(qry, "<= server: '%s' rtt: %ld ms\n", addr_str, elapsed);
 712 			}
 713 		} else {
 714 			/* Response didn't come from this IP, but we know the RTT must be at least
 715 			 * several RETRY timer tries, e.g. if we have addresses [a, b, c] and we have
 716 			 * tried [a, b] when the answer from 'a' came after 350ms, then we know
 717 			 * that 'b' didn't respond for at least 350 - (1 * 300) ms. We can't say that
 718 			 * its RTT is 50ms, but we can say that its score shouldn't be less than 50. */
 719 			 kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_MAX);
 720 			 WITH_VERBOSE {
 721 			 	char addr_str[INET6_ADDRSTRLEN];
 722 			 	inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
 723 			 	VERBOSE_MSG(qry, "<= server: '%s' rtt: >=%ld ms\n", addr_str, elapsed);
 724 			 }
 725 		}
 726 		/* Subtract query start time from elapsed time */
 727 		if (elapsed < KR_CONN_RETRY) {
 728 			break;
 729 		}
 730 		elapsed = elapsed - KR_CONN_RETRY;
 731 	}
 732 }
 733 
 734 static void update_nslist_score(struct kr_request *request, struct kr_query *qry, const struct sockaddr *src, knot_pkt_t *packet)
 735 {
 736 	struct kr_context *ctx = request->ctx;
 737 	/* On sucessful answer, update preference list RTT and penalise timer  */
 738 	if (request->state != KR_STATE_FAIL) {
 739 		/* Update RTT information for preference list */
 740 		update_nslist_rtt(ctx, qry, src);
 741 		/* Do not complete NS address resolution on soft-fail. */
 742 		const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0;
 743 		if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) {
 744 			qry->flags &= ~(QUERY_AWAIT_IPV6|QUERY_AWAIT_IPV4);
 745 		} else { /* Penalize SERVFAILs. */
 746 			kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
 747 		}
 748 	/* Penalise resolution failures except validation failures. */
 749 	} else if (!(qry->flags & QUERY_DNSSEC_BOGUS)) {
 750 		kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_RESET);
 751 		WITH_VERBOSE {
 752 			char addr_str[INET6_ADDRSTRLEN];
 753 			inet_ntop(src->sa_family, kr_inaddr(src), addr_str, sizeof(addr_str));
 754 			VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str);
 755 		}
 756 	}
 757 }
 758 
 759 int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet)
 760 {
 761 	struct kr_rplan *rplan = &request->rplan;
 762 
 763 	/* Empty resolution plan, push packet as the new query */
 764 	if (packet && kr_rplan_empty(rplan)) {
 765 		if (answer_prepare(request->answer, packet, request) != 0) {
 766 			return KR_STATE_FAIL;
 767 		}
 768 		return resolve_query(request, packet);
 769 	}
 770 
 771 	/* Different processing for network error */
 772 	struct kr_query *qry = array_tail(rplan->pending);
 773 	bool tried_tcp = (qry->flags & QUERY_TCP);
 774 	if (!packet || packet->size == 0) {
 775 		if (tried_tcp) {
 776 			request->state = KR_STATE_FAIL;
 777 		} else {
 778 			qry->flags |= QUERY_TCP;
 779 		}
 780 	} else {
 781 		/* Packet cleared, derandomize QNAME. */
 782 		knot_dname_t *qname_raw = (knot_dname_t *)knot_pkt_qname(packet);
 783 		if (qname_raw && qry->secret != 0) {
 784 			randomized_qname_case(qname_raw, qry->secret);
 785 		}
 786 		request->state = KR_STATE_CONSUME;
 787 		if (qry->flags & QUERY_CACHED) {
 788 			ITERATE_LAYERS(request, qry, consume, packet);
 789 		} else {
 790 			struct timeval now;
 791 			gettimeofday(&now, NULL);
 792 			/* Fill in source and latency information. */
 793 			request->upstream.rtt = time_diff(&qry->timestamp, &now);
 794 			request->upstream.addr = src;
 795 			ITERATE_LAYERS(request, qry, consume, packet);
 796 			/* Clear temporary information */
 797 			request->upstream.addr = NULL;
 798 			request->upstream.rtt = 0;
 799 		}
 800 	}
 801 
 802 	/* Track RTT for iterative answers */
 803 	if (src && !(qry->flags & QUERY_CACHED)) {
 804 		update_nslist_score(request, qry, src, packet);
 805 	}
 806 	/* Resolution failed, invalidate current NS. */
 807 	if (request->state == KR_STATE_FAIL) {
 808 		invalidate_ns(rplan, qry);
 809 		qry->flags &= ~QUERY_RESOLVED;
 810 	}
 811 
 812 	/* Pop query if resolved. */
 813 	if (request->state == KR_STATE_YIELD) {
 814 		return KR_STATE_PRODUCE; /* Requery */
 815 	} else if (qry->flags & QUERY_RESOLVED) {
 816 		kr_rplan_pop(rplan, qry);
 817 	} else if (!tried_tcp && (qry->flags & QUERY_TCP)) {
 818 		return KR_STATE_PRODUCE; /* Requery over TCP */
 819 	} else { /* Clear query flags for next attempt */
 820 		qry->flags &= ~(QUERY_CACHED|QUERY_TCP);
 821 	}
 822 
 823 	ITERATE_LAYERS(request, qry, reset);
 824 
 825 	/* Do not finish with bogus answer. */
 826 	if (qry->flags & QUERY_DNSSEC_BOGUS)  {
 827 		return KR_STATE_FAIL;
 828 	}
 829 
 830 	return kr_rplan_empty(&request->rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE;
 831 }
 832 
 833 /** @internal Spawn subrequest in current zone cut (no minimization or lookup). */
 834 static struct kr_query *zone_cut_subreq(struct kr_rplan *rplan, struct kr_query *parent,
 835                            const knot_dname_t *qname, uint16_t qtype)
 836 {
 837 	struct kr_query *next = kr_rplan_push(rplan, parent, qname, parent->sclass, qtype);
 838 	if (!next) {
 839 		return NULL;
 840 	}
 841 	kr_zonecut_set(&next->zone_cut, parent->zone_cut.name);
 842 	if (kr_zonecut_copy(&next->zone_cut, &parent->zone_cut) != 0 ||
 843 	    kr_zonecut_copy_trust(&next->zone_cut, &parent->zone_cut) != 0) {
 844 		return NULL;
 845 	}
 846 	next->flags |= QUERY_NO_MINIMIZE;
 847 	if (parent->flags & QUERY_DNSSEC_WANT) {
 848 		next->flags |= QUERY_DNSSEC_WANT;
 849 	}
 850 	return next;
 851 }
 852 
 853 /* @todo: Validator refactoring, keep this in driver for now. */
 854 static int trust_chain_check(struct kr_request *request, struct kr_query *qry)
 855 {
 856 	struct kr_rplan *rplan = &request->rplan;
 857 	map_t *trust_anchors = &request->ctx->trust_anchors;
 858 	map_t *negative_anchors = &request->ctx->negative_anchors;
 859 
 860 	/* Disable DNSSEC if it enters NTA. */
 861 	if (kr_ta_get(negative_anchors, qry->zone_cut.name)){
 862 		VERBOSE_MSG(qry, ">< negative TA, going insecure\n");
 863 		qry->flags &= ~QUERY_DNSSEC_WANT;
 864 		qry->flags |= QUERY_DNSSEC_INSECURE;
 865 	}
 866 	if (qry->flags & QUERY_DNSSEC_NODS) {
 867 		/* This is the next query iteration with minimized qname.
 868 		 * At previous iteration DS non-existance has been proven */
 869 		qry->flags &= ~QUERY_DNSSEC_NODS;
 870 		qry->flags &= ~QUERY_DNSSEC_WANT;
 871 		qry->flags |= QUERY_DNSSEC_INSECURE;
 872 	}
 873 	/* Enable DNSSEC if entering a new (or different) island of trust,
 874 	 * and update the TA RRset if required. */
 875 	bool want_secured = (qry->flags & QUERY_DNSSEC_WANT) &&
 876 			    !knot_wire_get_cd(request->answer->wire);
 877 	knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, qry->zone_cut.name);
 878 	if (!knot_wire_get_cd(request->answer->wire) && ta_rr) {
 879 		qry->flags |= QUERY_DNSSEC_WANT;
 880 		want_secured = true;
 881 
 882 		if (qry->zone_cut.trust_anchor == NULL
 883 		    || !knot_dname_is_equal(qry->zone_cut.trust_anchor->owner, qry->zone_cut.name)) {
 884 			mm_free(qry->zone_cut.pool, qry->zone_cut.trust_anchor);
 885 			qry->zone_cut.trust_anchor = knot_rrset_copy(ta_rr, qry->zone_cut.pool);
 886 
 887 			WITH_VERBOSE {
 888 			char qname_str[KNOT_DNAME_MAXLEN];
 889 			knot_dname_to_str(qname_str, ta_rr->owner, sizeof(qname_str));
 890 			VERBOSE_MSG(qry, ">< TA: '%s'\n", qname_str);
 891 			}
 892 		}
 893 	}
 894 
 895 	/* Try to fetch missing DS (from above the cut). */
 896 	const bool has_ta = (qry->zone_cut.trust_anchor != NULL);
 897 	const knot_dname_t *ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL);
 898 	const bool refetch_ta = !has_ta || !knot_dname_is_equal(qry->zone_cut.name, ta_name);
 899 	if (want_secured && refetch_ta) {
 900 		/* @todo we could fetch the information from the parent cut, but we don't remember that now */
 901 		struct kr_query *next = kr_rplan_push(rplan, qry, qry->zone_cut.name, qry->sclass, KNOT_RRTYPE_DS);
 902 		if (!next) {
 903 			return KR_STATE_FAIL;
 904 		}
 905 		next->flags |= QUERY_AWAIT_CUT|QUERY_DNSSEC_WANT;
 906 		return KR_STATE_DONE;
 907 	}
 908 	/* Try to fetch missing DNSKEY (either missing or above current cut).
 909 	 * Do not fetch if this is a DNSKEY subrequest to avoid circular dependency. */
 910 	const bool is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY);
 911 	const bool refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner));
 912 	if (want_secured && refetch_key && !is_dnskey_subreq) {
 913 		struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY);
 914 		if (!next) {
 915 			return KR_STATE_FAIL;
 916 		}
 917 		return KR_STATE_DONE;
 918 	}
 919 
 920 	return KR_STATE_PRODUCE;
 921 }
 922 
 923 /** @internal Check current zone cut status and credibility, spawn subrequests if needed. */
 924 static int zone_cut_check(struct kr_request *request, struct kr_query *qry, knot_pkt_t *packet)
 925 {
 926 	/* Stub mode, just forward and do not solve cut. */
 927 	if (qry->flags & QUERY_STUB) {
 928 		return KR_STATE_PRODUCE;
 929 	}
 930 
 931 	if (!(qry->flags & QUERY_AWAIT_CUT)) {
 932 		/* The query was resolved from cache.
 933 		 * Spawn DS \ DNSKEY requests if needed and exit */
 934 		return trust_chain_check(request, qry);
 935 	}
 936 
 937 	/* The query wasn't resolved from cache,
 938 	 * now it's the time to look up closest zone cut from cache. */
 939 	struct kr_cache *cache = &request->ctx->cache;
 940 	if (!kr_cache_is_open(cache)) {
 941 		int ret = kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
 942 		if (ret != 0) {
 943 			return KR_STATE_FAIL;
 944 		}
 945 		VERBOSE_MSG(qry, "=> using root hints\n");
 946 		qry->flags &= ~QUERY_AWAIT_CUT;
 947 		return KR_STATE_DONE;
 948 	}
 949 
 950 	const knot_dname_t *requested_name = qry->sname;
 951 	/* If at/subdomain of parent zone cut, start from its encloser.
 952 	 * This is for case when we get to a dead end
 953 	 * (and need glue from parent), or DS refetch. */
 954 	if (qry->parent) {
 955 		const knot_dname_t *parent = qry->parent->zone_cut.name;
 956 		if (parent[0] != '\0' && knot_dname_in(parent, qry->sname)) {
 957 			requested_name = knot_wire_next_label(parent, NULL);
 958 		}
 959 	} else if ((qry->stype == KNOT_RRTYPE_DS) && (qry->sname[0] != '\0')) {
 960 		/* If this is explicit DS query, start from encloser too. */
 961 		requested_name = knot_wire_next_label(requested_name, NULL);
 962 	}
 963 
 964 	int state = KR_STATE_FAIL;
 965 	do {
 966 		state = ns_fetch_cut(qry, requested_name, request, packet);
 967 		if (state == KR_STATE_DONE || state == KR_STATE_FAIL) {
 968 			return state;
 969 		} else if (state == KR_STATE_CONSUME) {
 970 			requested_name = knot_wire_next_label(requested_name, NULL);
 971 		}
 972 	} while (state == KR_STATE_CONSUME);
 973 
 974 	/* Update minimized QNAME if zone cut changed */
 975 	if (qry->zone_cut.name[0] != '\0' && !(qry->flags & QUERY_NO_MINIMIZE)) {
 976 		if (kr_make_query(qry, packet) != 0) {
 977 			return KR_STATE_FAIL;
 978 		}
 979 	}
 980 	qry->flags &= ~QUERY_AWAIT_CUT;
 981 
 982 	/* Check trust chain */
 983 	return trust_chain_check(request, qry);
 984 }
 985 
 986 int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet)
 987 {
 988 	struct kr_rplan *rplan = &request->rplan;
 989 	unsigned ns_election_iter = 0;
 990 
 991 	/* No query left for resolution */
 992 	if (kr_rplan_empty(rplan)) {
 993 		return KR_STATE_FAIL;
 994 	}
 995 	/* If we have deferred answers, resume them. */
 996 	struct kr_query *qry = array_tail(rplan->pending);
 997 	if (qry->deferred != NULL) {
 998 		/* @todo: Refactoring validator, check trust chain before resuming. */
 999 		switch(trust_chain_check(request, qry)) {
1000 		case KR_STATE_FAIL: return KR_STATE_FAIL;
1001 		case KR_STATE_DONE: return KR_STATE_PRODUCE;
1002 		default: break;
1003 		}
1004 		VERBOSE_MSG(qry, "=> resuming yielded answer\n");
1005 		struct kr_layer_pickle *pickle = qry->deferred;
1006 		request->state = KR_STATE_YIELD;
1007 		set_yield(&request->answ_selected, qry->uid, false);
1008 		set_yield(&request->auth_selected, qry->uid, false);
1009 		RESUME_LAYERS(layer_id(request, pickle->api), request, qry, consume, pickle->pkt);
1010 		if (request->state != KR_STATE_YIELD) {
1011 			/* No new deferred answers, take the next */
1012 			qry->deferred = pickle->next;
1013 		}
1014 	} else {
1015 		/* Caller is interested in always tracking a zone cut, even if the answer is cached
1016 		 * this is normally not required, and incurrs another cache lookups for cached answer. */
1017 		if (qry->flags & QUERY_ALWAYS_CUT) {
1018 			switch(zone_cut_check(request, qry, packet)) {
1019 			case KR_STATE_FAIL: return KR_STATE_FAIL;
1020 			case KR_STATE_DONE: return KR_STATE_PRODUCE;
1021 			default: break;
1022 			}
1023 		}
1024 		/* Resolve current query and produce dependent or finish */
1025 		request->state = KR_STATE_PRODUCE;
1026 		ITERATE_LAYERS(request, qry, produce, packet);
1027 		if (request->state != KR_STATE_FAIL && knot_wire_get_qr(packet->wire)) {
1028 			/* Produced an answer, consume it. */
1029 			qry->secret = 0;
1030 			request->state = KR_STATE_CONSUME;
1031 			ITERATE_LAYERS(request, qry, consume, packet);
1032 		}
1033 	}
1034 	switch(request->state) {
1035 	case KR_STATE_FAIL: return request->state;
1036 	case KR_STATE_CONSUME: break;
1037 	case KR_STATE_DONE:
1038 	default: /* Current query is done */
1039 		if (qry->flags & QUERY_RESOLVED && request->state != KR_STATE_YIELD) {
1040 			kr_rplan_pop(rplan, qry);
1041 		}
1042 		ITERATE_LAYERS(request, qry, reset);
1043 		return kr_rplan_empty(rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE;
1044 	}
1045 	
1046 
1047 	/* This query has RD=0 or is ANY, stop here. */
1048 	if (qry->stype == KNOT_RRTYPE_ANY || !knot_wire_get_rd(request->answer->wire)) {
1049 		VERBOSE_MSG(qry, "=> qtype is ANY or RD=0, bail out\n");
1050 		return KR_STATE_FAIL;
1051 	}
1052 
1053 	/* Update zone cut, spawn new subrequests. */
1054 	if (!(qry->flags & QUERY_STUB)) {
1055 		int state = zone_cut_check(request, qry, packet);
1056 		switch(state) {
1057 		case KR_STATE_FAIL: return KR_STATE_FAIL;
1058 		case KR_STATE_DONE: return KR_STATE_PRODUCE;
1059 		default: break;
1060 		}
1061 	}
1062 
1063 ns_election:
1064 
1065 	/* If the query has already selected a NS and is waiting for IPv4/IPv6 record,
1066 	 * elect best address only, otherwise elect a completely new NS.
1067 	 */
1068 	if(++ns_election_iter >= KR_ITER_LIMIT) {
1069 		VERBOSE_MSG(qry, "=> couldn't converge NS selection, bail out\n");
1070 		return KR_STATE_FAIL;
1071 	}
1072 
1073 	const bool retry = (qry->flags & (QUERY_TCP|QUERY_STUB|QUERY_BADCOOKIE_AGAIN));
1074 	if (qry->flags & (QUERY_AWAIT_IPV4|QUERY_AWAIT_IPV6)) {
1075 		kr_nsrep_elect_addr(qry, request->ctx);
1076 	} else if (!qry->ns.name || !retry) { /* Keep NS when requerying/stub/badcookie. */
1077 		/* Root DNSKEY must be fetched from the hints to avoid chicken and egg problem. */
1078 		if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) {
1079 			kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
1080 			qry->flags |= QUERY_NO_THROTTLE; /* Pick even bad SBELT servers */
1081 		}
1082 		kr_nsrep_elect(qry, request->ctx);
1083 		if (qry->ns.score > KR_NS_MAX_SCORE) {
1084 			VERBOSE_MSG(qry, "=> no valid NS left\n");
1085 			ITERATE_LAYERS(request, qry, reset);
1086 			kr_rplan_pop(rplan, qry);
1087 			return KR_STATE_PRODUCE;
1088 		}
1089 	}
1090 
1091 	/* Resolve address records */
1092 	if (qry->ns.addr[0].ip.sa_family == AF_UNSPEC) {
1093 		int ret = ns_resolve_addr(qry, request);
1094 		if (ret != 0) {
1095 			qry->flags &= ~(QUERY_AWAIT_IPV6|QUERY_AWAIT_IPV4|QUERY_TCP);
1096 			qry->ns.name = NULL;
1097 			goto ns_election; /* Must try different NS */
1098 		}
1099 		ITERATE_LAYERS(request, qry, reset);
1100 		return KR_STATE_PRODUCE;
1101 	}
1102 
1103 	/* Randomize query case (if not in safemode or turned off) */
1104 	qry->secret = (qry->flags & (QUERY_SAFEMODE | QUERY_NO_0X20))
1105 			? 0 : kr_rand_uint(UINT32_MAX);
1106 	knot_dname_t *qname_raw = (knot_dname_t *)knot_pkt_qname(packet);
1107 	randomized_qname_case(qname_raw, qry->secret);
1108 
1109 	/*
1110 	 * Additional query is going to be finalised when calling
1111 	 * kr_resolve_checkout().
1112 	 */
1113 
1114 	gettimeofday(&qry->timestamp, NULL);
1115 	*dst = &qry->ns.addr[0].ip;
1116 	*type = (qry->flags & QUERY_TCP) ? SOCK_STREAM : SOCK_DGRAM;
1117 	return request->state;
1118 }
1119 
1120 #if defined(ENABLE_COOKIES)
1121 /** Update DNS cookie data in packet. */
1122 static bool outbound_request_update_cookies(struct kr_request *req,
1123                                             const struct sockaddr *src,
1124                                             const struct sockaddr *dst)
1125 {
1126 	assert(req);
1127 
1128 	/* RFC7873 4.1 strongly requires server address. */
1129 	if (!dst) {
1130 		return false;
1131 	}
1132 
1133 	struct kr_cookie_settings *clnt_sett = &req->ctx->cookie_ctx.clnt;
1134 
1135 	/* Cookies disabled or packet has no EDNS section. */
1136 	if (!clnt_sett->enabled) {
1137 		return true;
1138 	}
1139 
1140 	/*
1141 	 * RFC7873 4.1 recommends using also the client address. The matter is
1142 	 * also discussed in section 6.
1143 	 */
1144 
1145 	kr_request_put_cookie(&clnt_sett->current, req->ctx->cache_cookie,
1146 	                      src, dst, req);
1147 
1148 	return true;
1149 }
1150 #endif /* defined(ENABLE_COOKIES) */
1151 
1152 int kr_resolve_checkout(struct kr_request *request, struct sockaddr *src,
1153                         struct sockaddr *dst, int type, knot_pkt_t *packet)
1154 {
1155 	/* @todo: Update documentation if this function becomes approved. */
1156 
1157 	struct kr_rplan *rplan = &request->rplan;
1158 
1159 	if (knot_wire_get_qr(packet->wire) != 0) {
1160 		return kr_ok();
1161 	}
1162 
1163 	/* No query left for resolution */
1164 	if (kr_rplan_empty(rplan)) {
1165 		return kr_error(EINVAL);
1166 	}
1167 	struct kr_query *qry = array_tail(rplan->pending);
1168 
1169 #if defined(ENABLE_COOKIES)
1170 	/* Update DNS cookies in request. */
1171 	if (type == SOCK_DGRAM) { /* @todo: Add cookies also over TCP? */
1172 		/*
1173 		 * The actual server IP address is needed before generating the
1174 		 * actual cookie. If we don't know the server address then we
1175 		 * also don't know the actual cookie size.
1176 		 */
1177 		if (!outbound_request_update_cookies(request, src, dst)) {
1178 			return kr_error(EINVAL);
1179 		}
1180 	}
1181 #endif /* defined(ENABLE_COOKIES) */
1182 
1183 	int ret = query_finalize(request, qry, packet);
1184 	if (ret != 0) {
1185 		return kr_error(EINVAL);
1186 	}
1187 
1188 	WITH_VERBOSE {
1189 	char qname_str[KNOT_DNAME_MAXLEN], zonecut_str[KNOT_DNAME_MAXLEN], ns_str[INET6_ADDRSTRLEN], type_str[16];
1190 	knot_dname_to_str(qname_str, knot_pkt_qname(packet), sizeof(qname_str));
1191 	knot_dname_to_str(zonecut_str, qry->zone_cut.name, sizeof(zonecut_str));
1192 	knot_rrtype_to_string(knot_pkt_qtype(packet), type_str, sizeof(type_str));
1193 	for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
1194 		struct sockaddr *addr = &qry->ns.addr[i].ip;
1195 		if (addr->sa_family == AF_UNSPEC) {
1196 			break;
1197 		}
1198 		if (!kr_inaddr_equal(dst, addr)) {
1199 			continue;
1200 		}
1201 		inet_ntop(addr->sa_family, kr_nsrep_inaddr(qry->ns.addr[i]), ns_str, sizeof(ns_str));
1202 		VERBOSE_MSG(qry, "=> querying: '%s' score: %u zone cut: '%s' m12n: '%s' type: '%s' proto: '%s'\n",
1203 			ns_str, qry->ns.score, zonecut_str, qname_str, type_str, (qry->flags & QUERY_TCP) ? "tcp" : "udp");
1204 		break;
1205 	}}
1206 
1207 	return kr_ok();
1208 }
1209 
1210 int kr_resolve_finish(struct kr_request *request, int state)
1211 {
1212 #ifndef NOVERBOSELOG
1213 	struct kr_rplan *rplan = &request->rplan;
1214 #endif
1215 	/* Finalize answer */
1216 	if (answer_finalize(request, state) != 0) {
1217 		state = KR_STATE_FAIL;
1218 	}
1219 	/* Error during procesing, internal failure */
1220 	if (state != KR_STATE_DONE) {
1221 		knot_pkt_t *answer = request->answer;
1222 		if (knot_wire_get_rcode(answer->wire) == KNOT_RCODE_NOERROR) {
1223 			knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL);
1224 		}
1225 	}
1226 
1227 	request->state = state;
1228 	ITERATE_LAYERS(request, NULL, finish);
1229 	VERBOSE_MSG(NULL, "finished: %d, queries: %zu, mempool: %zu B\n",
1230 	          request->state, rplan->resolved.len, (size_t) mp_total_size(request->pool.ctx));
1231 	return KR_STATE_DONE;
1232 }
1233 
1234 struct kr_rplan *kr_resolve_plan(struct kr_request *request)
1235 {
1236 	if (request) {
1237 		return &request->rplan;
1238 	}
1239 	return NULL;
1240 }
1241 
1242 knot_mm_t *kr_resolve_pool(struct kr_request *request)
1243 {
1244 	if (request) {
1245 		return &request->pool;
1246 	}
1247 	return NULL;
1248 }
1249 
1250 #undef VERBOSE_MSG
1251 

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2020-08-12 06:11:28, 41.7 KB) [[attachment:resolve.c]]
  • [get | view] (2020-08-12 06:11:28, 7.7 KB) [[attachment:resolve.h]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.