| /* |
| * CDDL HEADER START |
| * |
| * The contents of this file are subject to the terms of the |
| * Common Development and Distribution License (the "License"). |
| * You may not use this file except in compliance with the License. |
| * |
| * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
| * or http://www.opensolaris.org/os/licensing. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * |
| * When distributing Covered Code, include this CDDL HEADER in each |
| * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
| * If applicable, add the following below this CDDL HEADER, with the |
| * fields enclosed by brackets "[]" replaced with your own identifying |
| * information: Portions Copyright [yyyy] [name of copyright owner] |
| * |
| * CDDL HEADER END |
| */ |
| /* |
| * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
| * Use is subject to license terms. |
| */ |
| /* Copyright (c) 1990 Mentat Inc. */ |
| |
| #pragma ident "%Z%%M% %I% %E% SMI" |
| |
| #include <sys/types.h> |
| #include <sys/stream.h> |
| #include <sys/dlpi.h> |
| #include <sys/stropts.h> |
| #include <sys/sysmacros.h> |
| #include <sys/strsubr.h> |
| #include <sys/strlog.h> |
| #include <sys/strsun.h> |
| #include <sys/zone.h> |
| #define _SUN_TPI_VERSION 2 |
| #include <sys/tihdr.h> |
| #include <sys/xti_inet.h> |
| #include <sys/ddi.h> |
| #include <sys/sunddi.h> |
| #include <sys/cmn_err.h> |
| #include <sys/debug.h> |
| #include <sys/kobj.h> |
| #include <sys/modctl.h> |
| #include <sys/atomic.h> |
| #include <sys/policy.h> |
| #include <sys/priv.h> |
| |
| #include <sys/systm.h> |
| #include <sys/param.h> |
| #include <sys/kmem.h> |
| #include <sys/sdt.h> |
| #include <sys/socket.h> |
| #include <sys/vtrace.h> |
| #include <sys/isa_defs.h> |
| #include <net/if.h> |
| #include <net/if_arp.h> |
| #include <net/route.h> |
| #include <sys/sockio.h> |
| #include <netinet/in.h> |
| #include <net/if_dl.h> |
| |
| #include <inet/common.h> |
| #include <inet/mi.h> |
| #include <inet/mib2.h> |
| #include <inet/nd.h> |
| #include <inet/arp.h> |
| #include <inet/snmpcom.h> |
| #include <inet/kstatcom.h> |
| |
| #include <netinet/igmp_var.h> |
| #include <netinet/ip6.h> |
| #include <netinet/icmp6.h> |
| #include <netinet/sctp.h> |
| |
| #include <inet/ip.h> |
| #include <inet/ip_impl.h> |
| #include <inet/ip6.h> |
| #include <inet/ip6_asp.h> |
| #include <inet/tcp.h> |
| #include <inet/tcp_impl.h> |
| #include <inet/ip_multi.h> |
| #include <inet/ip_if.h> |
| #include <inet/ip_ire.h> |
| #include <inet/ip_ftable.h> |
| #include <inet/ip_rts.h> |
| #include <inet/optcom.h> |
| #include <inet/ip_ndp.h> |
| #include <inet/ip_listutils.h> |
| #include <netinet/igmp.h> |
| #include <netinet/ip_mroute.h> |
| #include <inet/ipp_common.h> |
| |
| #include <net/pfkeyv2.h> |
| #include <inet/ipsec_info.h> |
| #include <inet/sadb.h> |
| #include <inet/ipsec_impl.h> |
| #include <sys/iphada.h> |
| #include <inet/tun.h> |
| #include <inet/ipdrop.h> |
| #include <inet/ip_netinfo.h> |
| |
| #include <sys/ethernet.h> |
| #include <net/if_types.h> |
| #include <sys/cpuvar.h> |
| |
| #include <ipp/ipp.h> |
| #include <ipp/ipp_impl.h> |
| #include <ipp/ipgpc/ipgpc.h> |
| |
| #include <sys/multidata.h> |
| #include <sys/pattr.h> |
| |
| #include <inet/ipclassifier.h> |
| #include <inet/sctp_ip.h> |
| #include <inet/sctp/sctp_impl.h> |
| #include <inet/udp_impl.h> |
| #include <sys/sunddi.h> |
| |
| #include <sys/tsol/label.h> |
| #include <sys/tsol/tnet.h> |
| |
| #include <rpc/pmap_prot.h> |
| |
| /* |
| * Values for squeue switch: |
| * IP_SQUEUE_ENTER_NODRAIN: squeue_enter_nodrain |
| * IP_SQUEUE_ENTER: squeue_enter |
| * IP_SQUEUE_FILL: squeue_fill |
| */ |
| int ip_squeue_enter = 2; |
| squeue_func_t ip_input_proc; |
| /* |
| * IP statistics. |
| */ |
| #define IP_STAT(x) (ip_statistics.x.value.ui64++) |
| #define IP_STAT_UPDATE(x, n) (ip_statistics.x.value.ui64 += (n)) |
| #define SET_BPREV_FLAG(x) ((mblk_t *)(uintptr_t)(x)) |
| |
| typedef struct ip_stat { |
| kstat_named_t ipsec_fanout_proto; |
| kstat_named_t ip_udp_fannorm; |
| kstat_named_t ip_udp_fanmb; |
| kstat_named_t ip_udp_fanothers; |
| kstat_named_t ip_udp_fast_path; |
| kstat_named_t ip_udp_slow_path; |
| kstat_named_t ip_udp_input_err; |
| kstat_named_t ip_tcppullup; |
| kstat_named_t ip_tcpoptions; |
| kstat_named_t ip_multipkttcp; |
| kstat_named_t ip_tcp_fast_path; |
| kstat_named_t ip_tcp_slow_path; |
| kstat_named_t ip_tcp_input_error; |
| kstat_named_t ip_db_ref; |
| kstat_named_t ip_notaligned1; |
| kstat_named_t ip_notaligned2; |
| kstat_named_t ip_multimblk3; |
| kstat_named_t ip_multimblk4; |
| kstat_named_t ip_ipoptions; |
| kstat_named_t ip_classify_fail; |
| kstat_named_t ip_opt; |
| kstat_named_t ip_udp_rput_local; |
| kstat_named_t ipsec_proto_ahesp; |
| kstat_named_t ip_conn_flputbq; |
| kstat_named_t ip_conn_walk_drain; |
| kstat_named_t ip_out_sw_cksum; |
| kstat_named_t ip_in_sw_cksum; |
| kstat_named_t ip_trash_ire_reclaim_calls; |
| kstat_named_t ip_trash_ire_reclaim_success; |
| kstat_named_t ip_ire_arp_timer_expired; |
| kstat_named_t ip_ire_redirect_timer_expired; |
| kstat_named_t ip_ire_pmtu_timer_expired; |
| kstat_named_t ip_input_multi_squeue; |
| kstat_named_t ip_tcp_in_full_hw_cksum_err; |
| kstat_named_t ip_tcp_in_part_hw_cksum_err; |
| kstat_named_t ip_tcp_in_sw_cksum_err; |
| kstat_named_t ip_tcp_out_sw_cksum_bytes; |
| kstat_named_t ip_udp_in_full_hw_cksum_err; |
| kstat_named_t ip_udp_in_part_hw_cksum_err; |
| kstat_named_t ip_udp_in_sw_cksum_err; |
| kstat_named_t ip_udp_out_sw_cksum_bytes; |
| kstat_named_t ip_frag_mdt_pkt_out; |
| kstat_named_t ip_frag_mdt_discarded; |
| kstat_named_t ip_frag_mdt_allocfail; |
| kstat_named_t ip_frag_mdt_addpdescfail; |
| kstat_named_t ip_frag_mdt_allocd; |
| } ip_stat_t; |
| |
| static ip_stat_t ip_statistics = { |
| { "ipsec_fanout_proto", KSTAT_DATA_UINT64 }, |
| { "ip_udp_fannorm", KSTAT_DATA_UINT64 }, |
| { "ip_udp_fanmb", KSTAT_DATA_UINT64 }, |
| { "ip_udp_fanothers", KSTAT_DATA_UINT64 }, |
| { "ip_udp_fast_path", KSTAT_DATA_UINT64 }, |
| { "ip_udp_slow_path", KSTAT_DATA_UINT64 }, |
| { "ip_udp_input_err", KSTAT_DATA_UINT64 }, |
| { "ip_tcppullup", KSTAT_DATA_UINT64 }, |
| { "ip_tcpoptions", KSTAT_DATA_UINT64 }, |
| { "ip_multipkttcp", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_fast_path", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_slow_path", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_input_error", KSTAT_DATA_UINT64 }, |
| { "ip_db_ref", KSTAT_DATA_UINT64 }, |
| { "ip_notaligned1", KSTAT_DATA_UINT64 }, |
| { "ip_notaligned2", KSTAT_DATA_UINT64 }, |
| { "ip_multimblk3", KSTAT_DATA_UINT64 }, |
| { "ip_multimblk4", KSTAT_DATA_UINT64 }, |
| { "ip_ipoptions", KSTAT_DATA_UINT64 }, |
| { "ip_classify_fail", KSTAT_DATA_UINT64 }, |
| { "ip_opt", KSTAT_DATA_UINT64 }, |
| { "ip_udp_rput_local", KSTAT_DATA_UINT64 }, |
| { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, |
| { "ip_conn_flputbq", KSTAT_DATA_UINT64 }, |
| { "ip_conn_walk_drain", KSTAT_DATA_UINT64 }, |
| { "ip_out_sw_cksum", KSTAT_DATA_UINT64 }, |
| { "ip_in_sw_cksum", KSTAT_DATA_UINT64 }, |
| { "ip_trash_ire_reclaim_calls", KSTAT_DATA_UINT64 }, |
| { "ip_trash_ire_reclaim_success", KSTAT_DATA_UINT64 }, |
| { "ip_ire_arp_timer_expired", KSTAT_DATA_UINT64 }, |
| { "ip_ire_redirect_timer_expired", KSTAT_DATA_UINT64 }, |
| { "ip_ire_pmtu_timer_expired", KSTAT_DATA_UINT64 }, |
| { "ip_input_multi_squeue", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, |
| { "ip_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, |
| { "ip_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, |
| { "ip_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, |
| { "ip_frag_mdt_discarded", KSTAT_DATA_UINT64 }, |
| { "ip_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, |
| { "ip_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, |
| { "ip_frag_mdt_allocd", KSTAT_DATA_UINT64 }, |
| }; |
| |
| static kstat_t *ip_kstat; |
| |
| #define TCP6 "tcp6" |
| #define TCP "tcp" |
| #define SCTP "sctp" |
| #define SCTP6 "sctp6" |
| |
| major_t TCP6_MAJ; |
| major_t TCP_MAJ; |
| major_t SCTP_MAJ; |
| major_t SCTP6_MAJ; |
| |
| int ip_poll_normal_ms = 100; |
| int ip_poll_normal_ticks = 0; |
| int ip_modclose_ackwait_ms = 3000; |
| |
| /* |
| * Structure to represent a linked list of msgblks. Used by ip_snmp_ functions. |
| */ |
| |
| struct listptr_s { |
| mblk_t *lp_head; /* pointer to the head of the list */ |
| mblk_t *lp_tail; /* pointer to the tail of the list */ |
| }; |
| |
| typedef struct listptr_s listptr_t; |
| |
| /* |
| * This is used by ip_snmp_get_mib2_ip_route_media and |
| * ip_snmp_get_mib2_ip6_route_media to carry the lists of return data. |
| */ |
| typedef struct iproutedata_s { |
| uint_t ird_idx; |
| listptr_t ird_route; /* ipRouteEntryTable */ |
| listptr_t ird_netmedia; /* ipNetToMediaEntryTable */ |
| listptr_t ird_attrs; /* ipRouteAttributeTable */ |
| } iproutedata_t; |
| |
| /* |
| * Cluster specific hooks. These should be NULL when booted as a non-cluster |
| */ |
| |
| /* |
| * Hook functions to enable cluster networking |
| * On non-clustered systems these vectors must always be NULL. |
| * |
| * Hook function to Check ip specified ip address is a shared ip address |
| * in the cluster |
| * |
| */ |
| int (*cl_inet_isclusterwide)(uint8_t protocol, |
| sa_family_t addr_family, uint8_t *laddrp) = NULL; |
| |
| /* |
| * Hook function to generate cluster wide ip fragment identifier |
| */ |
| uint32_t (*cl_inet_ipident)(uint8_t protocol, sa_family_t addr_family, |
| uint8_t *laddrp, uint8_t *faddrp) = NULL; |
| |
| /* |
| * Synchronization notes: |
| * |
| * IP is a fully D_MP STREAMS module/driver. Thus it does not depend on any |
| * MT level protection given by STREAMS. IP uses a combination of its own |
| * internal serialization mechanism and standard Solaris locking techniques. |
| * The internal serialization is per phyint (no IPMP) or per IPMP group. |
| * This is used to serialize plumbing operations, IPMP operations, certain |
| * multicast operations, most set ioctls, igmp/mld timers etc. |
| * |
| * Plumbing is a long sequence of operations involving message |
| * exchanges between IP, ARP and device drivers. Many set ioctls are typically |
| * involved in plumbing operations. A natural model is to serialize these |
| * ioctls one per ill. For example plumbing of hme0 and qfe0 can go on in |
| * parallel without any interference. But various set ioctls on hme0 are best |
| * serialized. However if the system uses IPMP, the operations are easier if |
| * they are serialized on a per IPMP group basis since IPMP operations |
| * happen across ill's of a group. Thus the lowest common denominator is to |
| * serialize most set ioctls, multicast join/leave operations, IPMP operations |
| * igmp/mld timer operations, and processing of DLPI control messages received |
| * from drivers on a per IPMP group basis. If the system does not employ |
| * IPMP the serialization is on a per phyint basis. This serialization is |
| * provided by the ipsq_t and primitives operating on this. Details can |
| * be found in ip_if.c above the core primitives operating on ipsq_t. |
| * |
| * Lookups of an ipif or ill by a thread return a refheld ipif / ill. |
| * Simiarly lookup of an ire by a thread also returns a refheld ire. |
| * In addition ipif's and ill's referenced by the ire are also indirectly |
| * refheld. Thus no ipif or ill can vanish nor can critical parameters like |
| * the ipif's address or netmask change as long as an ipif is refheld |
| * directly or indirectly. For example an SIOCLIFADDR ioctl that changes the |
| * address of an ipif has to go through the ipsq_t. This ensures that only |
| * 1 such exclusive operation proceeds at any time on the ipif. It then |
| * deletes all ires associated with this ipif, and waits for all refcnts |
| * associated with this ipif to come down to zero. The address is changed |
| * only after the ipif has been quiesced. Then the ipif is brought up again. |
| * More details are described above the comment in ip_sioctl_flags. |
| * |
| * Packet processing is based mostly on IREs and are fully multi-threaded |
| * using standard Solaris MT techniques. |
| * |
| * There are explicit locks in IP to handle: |
| * - The ip_g_head list maintained by mi_open_link() and friends. |
| * |
| * - The reassembly data structures (one lock per hash bucket) |
| * |
| * - conn_lock is meant to protect conn_t fields. The fields actually |
| * protected by conn_lock are documented in the conn_t definition. |
| * |
| * - ire_lock to protect some of the fields of the ire, IRE tables |
| * (one lock per hash bucket). Refer to ip_ire.c for details. |
| * |
| * - ndp_g_lock and nce_lock for protecting NCEs. |
| * |
| * - ill_lock protects fields of the ill and ipif. Details in ip.h |
| * |
| * - ill_g_lock: This is a global reader/writer lock. Protects the following |
| * * The AVL tree based global multi list of all ills. |
| * * The linked list of all ipifs of an ill |
| * * The <ill-ipsq> mapping |
| * * The ipsq->ipsq_phyint_list threaded by phyint_ipsq_next |
| * * The illgroup list threaded by ill_group_next. |
| * * <ill-phyint> association |
| * Insertion/deletion of an ill in the system, insertion/deletion of an ipif |
| * into an ill, changing the <ill-ipsq> mapping of an ill, insertion/deletion |
| * of an ill into the illgrp list, changing the <ill-phyint> assoc of an ill |
| * will all have to hold the ill_g_lock as writer for the actual duration |
| * of the insertion/deletion/change. More details about the <ill-ipsq> mapping |
| * may be found in the IPMP section. |
| * |
| * - ill_lock: This is a per ill mutex. |
| * It protects some members of the ill and is documented below. |
| * It also protects the <ill-ipsq> mapping |
| * It also protects the illgroup list threaded by ill_group_next. |
| * It also protects the <ill-phyint> assoc. |
| * It also protects the list of ipifs hanging off the ill. |
| * |
| * - ipsq_lock: This is a per ipsq_t mutex lock. |
| * This protects all the other members of the ipsq struct except |
| * ipsq_refs and ipsq_phyint_list which are protected by ill_g_lock |
| * |
| * - illgrp_lock: This is a per ill_group mutex lock. |
| * The only thing it protects is the illgrp_ill_schednext member of ill_group |
| * which dictates which is the next ill in an ill_group that is to be chosen |
| * for sending outgoing packets, through creation of an IRE_CACHE that |
| * references this ill. |
| * |
| * - phyint_lock: This is a per phyint mutex lock. Protects just the |
| * phyint_flags |
| * |
| * - ip_g_nd_lock: This is a global reader/writer lock. |
| * Any call to nd_load to load a new parameter to the ND table must hold the |
| * lock as writer. ND_GET/ND_SET routines that read the ND table hold the lock |
| * as reader. |
| * |
| * - ip_addr_avail_lock: This is used to ensure the uniqueness of IP addresses. |
| * This lock is held in ipif_up_done and the ipif is marked IPIF_UP and the |
| * uniqueness check also done atomically. |
| * |
| * - ipsec_capab_ills_lock: This readers/writer lock protects the global |
| * lists of IPsec capable ills (ipsec_capab_ills_{ah,esp}). It is taken |
| * as a writer when adding or deleting elements from these lists, and |
| * as a reader when walking these lists to send a SADB update to the |
| * IPsec capable ills. |
| * |
| * - ill_g_usesrc_lock: This readers/writer lock protects the usesrc |
| * group list linked by ill_usesrc_grp_next. It also protects the |
| * ill_usesrc_ifindex field. It is taken as a writer when a member of the |
| * group is being added or deleted. This lock is taken as a reader when |
| * walking the list/group(eg: to get the number of members in a usesrc group). |
| * Note, it is only necessary to take this lock if the ill_usesrc_grp_next |
| * field is changing state i.e from NULL to non-NULL or vice-versa. For |
| * example, it is not necessary to take this lock in the initial portion |
| * of ip_sioctl_slifusesrc or at all in ip_sioctl_groupname and |
| * ip_sioctl_flags since the these operations are executed exclusively and |
| * that ensures that the "usesrc group state" cannot change. The "usesrc |
| * group state" change can happen only in the latter part of |
| * ip_sioctl_slifusesrc and in ill_delete. |
| * |
| * Changing <ill-phyint>, <ill-ipsq>, <ill-illgroup> assocications. |
| * |
| * To change the <ill-phyint> association, the ill_g_lock must be held |
| * as writer, and the ill_locks of both the v4 and v6 instance of the ill |
| * must be held. |
| * |
| * To change the <ill-ipsq> association the ill_g_lock must be held as writer |
| * and the ill_lock of the ill in question must be held. |
| * |
| * To change the <ill-illgroup> association the ill_g_lock must be held as |
| * writer and the ill_lock of the ill in question must be held. |
| * |
| * To add or delete an ipif from the list of ipifs hanging off the ill, |
| * ill_g_lock (writer) and ill_lock must be held and the thread must be |
| * a writer on the associated ipsq,. |
| * |
| * To add or delete an ill to the system, the ill_g_lock must be held as |
| * writer and the thread must be a writer on the associated ipsq. |
| * |
| * To add or delete an ilm to an ill, the ill_lock must be held and the thread |
| * must be a writer on the associated ipsq. |
| * |
| * Lock hierarchy |
| * |
| * Some lock hierarchy scenarios are listed below. |
| * |
| * ill_g_lock -> conn_lock -> ill_lock -> ipsq_lock |
| * ill_g_lock -> illgrp_lock -> ill_lock |
| * ill_g_lock -> ill_lock(s) -> phyint_lock |
| * ill_g_lock -> ndp_g_lock -> ill_lock -> nce_lock |
| * ill_g_lock -> ip_addr_avail_lock |
| * conn_lock -> irb_lock -> ill_lock -> ire_lock |
| * ill_g_lock -> ip_g_nd_lock |
| * |
| * When more than 1 ill lock is needed to be held, all ill lock addresses |
| * are sorted on address and locked starting from highest addressed lock |
| * downward. |
| * |
| * Mobile-IP scenarios |
| * |
| * irb_lock -> ill_lock -> ire_mrtun_lock |
| * irb_lock -> ill_lock -> ire_srcif_table_lock |
| * |
| * IPsec scenarios |
| * |
| * ipsa_lock -> ill_g_lock -> ill_lock |
| * ipsec_capab_ills_lock -> ill_g_lock -> ill_lock |
| * ipsec_capab_ills_lock -> ipsa_lock |
| * ill_g_usesrc_lock -> ill_g_lock -> ill_lock |
| * |
| * Trusted Solaris scenarios |
| * |
| * igsa_lock -> gcgrp_rwlock -> gcgrp_lock |
| * igsa_lock -> gcdb_lock |
| * gcgrp_rwlock -> ire_lock |
| * gcgrp_rwlock -> gcdb_lock |
| * |
| * |
| * Routing/forwarding table locking notes: |
| * |
| * Lock acquisition order: Radix tree lock, irb_lock. |
| * Requirements: |
| * i. Walker must not hold any locks during the walker callback. |
| * ii Walker must not see a truncated tree during the walk because of any node |
| * deletion. |
| * iii Existing code assumes ire_bucket is valid if it is non-null and is used |
| * in many places in the code to walk the irb list. Thus even if all the |
| * ires in a bucket have been deleted, we still can't free the radix node |
| * until the ires have actually been inactive'd (freed). |
| * |
| * Tree traversal - Need to hold the global tree lock in read mode. |
| * Before dropping the global tree lock, need to either increment the ire_refcnt |
| * to ensure that the radix node can't be deleted. |
| * |
| * Tree add - Need to hold the global tree lock in write mode to add a |
| * radix node. To prevent the node from being deleted, increment the |
| * irb_refcnt, after the node is added to the tree. The ire itself is |
| * added later while holding the irb_lock, but not the tree lock. |
| * |
| * Tree delete - Need to hold the global tree lock and irb_lock in write mode. |
| * All associated ires must be inactive (i.e. freed), and irb_refcnt |
| * must be zero. |
| * |
| * Walker - Increment irb_refcnt before calling the walker callback. Hold the |
| * global tree lock (read mode) for traversal. |
| * |
| * IPSEC notes : |
| * |
| * IP interacts with the IPSEC code (AH/ESP) by tagging a M_CTL message |
| * in front of the actual packet. For outbound datagrams, the M_CTL |
| * contains a ipsec_out_t (defined in ipsec_info.h), which has the |
| * information used by the IPSEC code for applying the right level of |
| * protection. The information initialized by IP in the ipsec_out_t |
| * is determined by the per-socket policy or global policy in the system. |
| * For inbound datagrams, the M_CTL contains a ipsec_in_t (defined in |
| * ipsec_info.h) which starts out with nothing in it. It gets filled |
| * with the right information if it goes through the AH/ESP code, which |
| * happens if the incoming packet is secure. The information initialized |
| * by AH/ESP, is later used by IP(during fanouts to ULP) to see whether |
| * the policy requirements needed by per-socket policy or global policy |
| * is met or not. |
| * |
| * If there is both per-socket policy (set using setsockopt) and there |
| * is also global policy match for the 5 tuples of the socket, |
| * ipsec_override_policy() makes the decision of which one to use. |
| * |
| * For fully connected sockets i.e dst, src [addr, port] is known, |
| * conn_policy_cached is set indicating that policy has been cached. |
| * conn_in_enforce_policy may or may not be set depending on whether |
| * there is a global policy match or per-socket policy match. |
| * Policy inheriting happpens in ip_bind during the ipa_conn_t bind. |
| * Once the right policy is set on the conn_t, policy cannot change for |
| * this socket. This makes life simpler for TCP (UDP ?) where |
| * re-transmissions go out with the same policy. For symmetry, policy |
| * is cached for fully connected UDP sockets also. Thus if policy is cached, |
| * it also implies that policy is latched i.e policy cannot change |
| * on these sockets. As we have the right policy on the conn, we don't |
| * have to lookup global policy for every outbound and inbound datagram |
| * and thus serving as an optimization. Note that a global policy change |
| * does not affect fully connected sockets if they have policy. If fully |
| * connected sockets did not have any policy associated with it, global |
| * policy change may affect them. |
| * |
| * IP Flow control notes: |
| * |
| * Non-TCP streams are flow controlled by IP. On the send side, if the packet |
| * cannot be sent down to the driver by IP, because of a canput failure, IP |
| * does a putq on the conn_wq. This will cause ip_wsrv to run on the conn_wq. |
| * ip_wsrv in turn, inserts the conn in a list of conn's that need to be drained |
| * when the flowcontrol condition subsides. Ultimately STREAMS backenables the |
| * ip_wsrv on the IP module, which in turn does a qenable of the conn_wq of the |
| * first conn in the list of conn's to be drained. ip_wsrv on this conn drains |
| * the queued messages, and removes the conn from the drain list, if all |
| * messages were drained. It also qenables the next conn in the drain list to |
| * continue the drain process. |
| * |
| * In reality the drain list is not a single list, but a configurable number |
| * of lists. The ip_wsrv on the IP module, qenables the first conn in each |
| * list. If the ip_wsrv of the next qenabled conn does not run, because the |
| * stream closes, ip_close takes responsibility to qenable the next conn in |
| * the drain list. The directly called ip_wput path always does a putq, if |
| * it cannot putnext. Thus synchronization problems are handled between |
| * ip_wsrv and ip_close. conn_drain_insert and conn_drain_tail are the only |
| * functions that manipulate this drain list. Furthermore conn_drain_insert |
| * is called only from ip_wsrv, and there can be only 1 instance of ip_wsrv |
| * running on a queue at any time. conn_drain_tail can be simultaneously called |
| * from both ip_wsrv and ip_close. |
| * |
| * IPQOS notes: |
| * |
| * IPQoS Policies are applied to packets using IPPF (IP Policy framework) |
| * and IPQoS modules. IPPF includes hooks in IP at different control points |
| * (callout positions) which direct packets to IPQoS modules for policy |
| * processing. Policies, if present, are global. |
| * |
| * The callout positions are located in the following paths: |
| * o local_in (packets destined for this host) |
| * o local_out (packets orginating from this host ) |
| * o fwd_in (packets forwarded by this m/c - inbound) |
| * o fwd_out (packets forwarded by this m/c - outbound) |
| * Hooks at these callout points can be enabled/disabled using the ndd variable |
| * ip_policy_mask (a bit mask with the 4 LSB indicating the callout positions). |
| * By default all the callout positions are enabled. |
| * |
| * Outbound (local_out) |
| * Hooks are placed in ip_wput_ire and ipsec_out_process. |
| * |
| * Inbound (local_in) |
| * Hooks are placed in ip_proto_input, icmp_inbound, ip_fanout_proto and |
| * TCP and UDP fanout routines. |
| * |
| * Forwarding (in and out) |
| * Hooks are placed in ip_rput_forward and ip_mrtun_forward. |
| * |
| * IP Policy Framework processing (IPPF processing) |
| * Policy processing for a packet is initiated by ip_process, which ascertains |
| * that the classifier (ipgpc) is loaded and configured, failing which the |
| * packet resumes normal processing in IP. If the clasifier is present, the |
| * packet is acted upon by one or more IPQoS modules (action instances), per |
| * filters configured in ipgpc and resumes normal IP processing thereafter. |
| * An action instance can drop a packet in course of its processing. |
| * |
| * A boolean variable, ip_policy, is used in all the fanout routines that can |
| * invoke ip_process for a packet. This variable indicates if the packet should |
| * to be sent for policy processing. The variable is set to B_TRUE by default, |
| * i.e. when the routines are invoked in the normal ip procesing path for a |
| * packet. The two exceptions being ip_wput_local and icmp_inbound_error_fanout; |
| * ip_policy is set to B_FALSE for all the routines called in these two |
| * functions because, in the former case, we don't process loopback traffic |
| * currently while in the latter, the packets have already been processed in |
| * icmp_inbound. |
| * |
| * Zones notes: |
| * |
| * The partitioning rules for networking are as follows: |
| * 1) Packets coming from a zone must have a source address belonging to that |
| * zone. |
| * 2) Packets coming from a zone can only be sent on a physical interface on |
| * which the zone has an IP address. |
| * 3) Between two zones on the same machine, packet delivery is only allowed if |
| * there's a matching route for the destination and zone in the forwarding |
| * table. |
| * 4) The TCP and UDP port spaces are per-zone; that is, two processes in |
| * different zones can bind to the same port with the wildcard address |
| * (INADDR_ANY). |
| * |
| * The granularity of interface partitioning is at the logical interface level. |
| * Therefore, every zone has its own IP addresses, and incoming packets can be |
| * attributed to a zone unambiguously. A logical interface is placed into a zone |
| * using the SIOCSLIFZONE ioctl; this sets the ipif_zoneid field in the ipif_t |
| * structure. Rule (1) is implemented by modifying the source address selection |
| * algorithm so that the list of eligible addresses is filtered based on the |
| * sending process zone. |
| * |
| * The Internet Routing Entries (IREs) are either exclusive to a zone or shared |
| * across all zones, depending on their type. Here is the break-up: |
| * |
| * IRE type Shared/exclusive |
| * -------- ---------------- |
| * IRE_BROADCAST Exclusive |
| * IRE_DEFAULT (default routes) Shared (*) |
| * IRE_LOCAL Exclusive (x) |
| * IRE_LOOPBACK Exclusive |
| * IRE_PREFIX (net routes) Shared (*) |
| * IRE_CACHE Exclusive |
| * IRE_IF_NORESOLVER (interface routes) Exclusive |
| * IRE_IF_RESOLVER (interface routes) Exclusive |
| * IRE_HOST (host routes) Shared (*) |
| * |
| * (*) A zone can only use a default or off-subnet route if the gateway is |
| * directly reachable from the zone, that is, if the gateway's address matches |
| * one of the zone's logical interfaces. |
| * |
| * (x) IRE_LOCAL are handled a bit differently, since for all other entries |
| * in ire_ctable and IRE_INTERFACE, ire_src_addr is what can be used as source |
| * when sending packets using the IRE. For IRE_LOCAL ire_src_addr is the IP |
| * address of the zone itself (the destination). Since IRE_LOCAL is used |
| * for communication between zones, ip_wput_ire has special logic to set |
| * the right source address when sending using an IRE_LOCAL. |
| * |
| * Furthermore, when ip_restrict_interzone_loopback is set (the default), |
| * ire_cache_lookup restricts loopback using an IRE_LOCAL |
| * between zone to the case when L2 would have conceptually looped the packet |
| * back, i.e. the loopback which is required since neither Ethernet drivers |
| * nor Ethernet hardware loops them back. This is the case when the normal |
| * routes (ignoring IREs with different zoneids) would send out the packet on |
| * the same ill (or ill group) as the ill with which is IRE_LOCAL is |
| * associated. |
| * |
| * Multiple zones can share a common broadcast address; typically all zones |
| * share the 255.255.255.255 address. Incoming as well as locally originated |
| * broadcast packets must be dispatched to all the zones on the broadcast |
| * network. For directed broadcasts (e.g. 10.16.72.255) this is not trivial |
| * since some zones may not be on the 10.16.72/24 network. To handle this, each |
| * zone has its own set of IRE_BROADCAST entries; then, broadcast packets are |
| * sent to every zone that has an IRE_BROADCAST entry for the destination |
| * address on the input ill, see conn_wantpacket(). |
| * |
| * Applications in different zones can join the same multicast group address. |
| * For IPv4, group memberships are per-logical interface, so they're already |
| * inherently part of a zone. For IPv6, group memberships are per-physical |
| * interface, so we distinguish IPv6 group memberships based on group address, |
| * interface and zoneid. In both cases, received multicast packets are sent to |
| * every zone for which a group membership entry exists. On IPv6 we need to |
| * check that the target zone still has an address on the receiving physical |
| * interface; it could have been removed since the application issued the |
| * IPV6_JOIN_GROUP. |
| */ |
| |
| /* |
| * Squeue Fanout flags: |
| * 0: No fanout. |
| * 1: Fanout across all squeues |
| */ |
| boolean_t ip_squeue_fanout = 0; |
| |
| /* |
| * Maximum dups allowed per packet. |
| */ |
| uint_t ip_max_frag_dups = 10; |
| |
| #define IS_SIMPLE_IPH(ipha) \ |
| ((ipha)->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION) |
| |
| /* RFC1122 Conformance */ |
| #define IP_FORWARD_DEFAULT IP_FORWARD_NEVER |
| |
| #define ILL_MAX_NAMELEN LIFNAMSIZ |
| |
| static int conn_set_held_ipif(conn_t *, ipif_t **, ipif_t *); |
| |
| static mblk_t *ip_wput_attach_llhdr(mblk_t *, ire_t *, ip_proc_t, uint32_t); |
| static void ip_ipsec_out_prepend(mblk_t *, mblk_t *, ill_t *); |
| |
| static void icmp_frag_needed(queue_t *, mblk_t *, int, zoneid_t); |
| static void icmp_inbound(queue_t *, mblk_t *, boolean_t, ill_t *, int, |
| uint32_t, boolean_t, boolean_t, ill_t *, zoneid_t); |
| static ipaddr_t icmp_get_nexthop_addr(ipha_t *, ill_t *, zoneid_t, mblk_t *mp); |
| static boolean_t icmp_inbound_too_big(icmph_t *, ipha_t *, ill_t *, zoneid_t, |
| mblk_t *, int); |
| static void icmp_inbound_error_fanout(queue_t *, ill_t *, mblk_t *, |
| icmph_t *, ipha_t *, int, int, boolean_t, boolean_t, |
| ill_t *, zoneid_t); |
| static void icmp_options_update(ipha_t *); |
| static void icmp_param_problem(queue_t *, mblk_t *, uint8_t, zoneid_t); |
| static void icmp_pkt(queue_t *, mblk_t *, void *, size_t, boolean_t, |
| zoneid_t zoneid); |
| static mblk_t *icmp_pkt_err_ok(mblk_t *); |
| static void icmp_redirect(mblk_t *); |
| static void icmp_send_redirect(queue_t *, mblk_t *, ipaddr_t); |
| |
| static void ip_arp_news(queue_t *, mblk_t *); |
| static boolean_t ip_bind_insert_ire(mblk_t *, ire_t *, iulp_t *); |
| mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); |
| char *ip_dot_addr(ipaddr_t, char *); |
| mblk_t *ip_carve_mp(mblk_t **, ssize_t); |
| int ip_close(queue_t *, int); |
| static char *ip_dot_saddr(uchar_t *, char *); |
| static void ip_fanout_proto(queue_t *, mblk_t *, ill_t *, ipha_t *, uint_t, |
| boolean_t, boolean_t, ill_t *, zoneid_t); |
| static void ip_fanout_tcp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint_t, |
| boolean_t, boolean_t, zoneid_t); |
| static void ip_fanout_udp(queue_t *, mblk_t *, ill_t *, ipha_t *, uint32_t, |
| boolean_t, uint_t, boolean_t, boolean_t, ill_t *, zoneid_t); |
| static void ip_lrput(queue_t *, mblk_t *); |
| ipaddr_t ip_massage_options(ipha_t *); |
| static void ip_mrtun_forward(ire_t *, ill_t *, mblk_t *); |
| ipaddr_t ip_net_mask(ipaddr_t); |
| void ip_newroute(queue_t *, mblk_t *, ipaddr_t, ill_t *, conn_t *, |
| zoneid_t); |
| static void ip_newroute_ipif(queue_t *, mblk_t *, ipif_t *, ipaddr_t, |
| conn_t *, uint32_t, zoneid_t, ip_opt_info_t *); |
| char *ip_nv_lookup(nv_t *, int); |
| static boolean_t ip_check_for_ipsec_opt(queue_t *, mblk_t *); |
| static int ip_param_get(queue_t *, mblk_t *, caddr_t, cred_t *); |
| static int ip_param_generic_get(queue_t *, mblk_t *, caddr_t, cred_t *); |
| static boolean_t ip_param_register(ipparam_t *, size_t, ipndp_t *, |
| size_t); |
| static int ip_param_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); |
| void ip_rput(queue_t *, mblk_t *); |
| static void ip_rput_dlpi_writer(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, |
| void *dummy_arg); |
| void ip_rput_forward(ire_t *, ipha_t *, mblk_t *, ill_t *); |
| static int ip_rput_forward_options(mblk_t *, ipha_t *, ire_t *); |
| static boolean_t ip_rput_local_options(queue_t *, mblk_t *, ipha_t *, |
| ire_t *); |
| static boolean_t ip_rput_multimblk_ipoptions(queue_t *, ill_t *, |
| mblk_t *, ipha_t **, ipaddr_t *); |
| static int ip_rput_options(queue_t *, mblk_t *, ipha_t *, ipaddr_t *); |
| static boolean_t ip_rput_fragment(queue_t *, mblk_t **, ipha_t *, uint32_t *, |
| uint16_t *); |
| int ip_snmp_get(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip(queue_t *, mblk_t *, |
| mib2_ipIfStatsEntry_t *); |
| static mblk_t *ip_snmp_get_mib2_ip_traffic_stats(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip6(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_icmp(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_icmp6(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_igmp(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_multi(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip_addr(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip6_addr(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip_group_mem(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip6_group_mem(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip_group_src(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip6_group_src(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_virt_multi(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_multi_rtable(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip_route_media(queue_t *, mblk_t *); |
| static mblk_t *ip_snmp_get_mib2_ip6_route_media(queue_t *, mblk_t *); |
| static void ip_snmp_get2_v4(ire_t *, iproutedata_t *); |
| static void ip_snmp_get2_v6_route(ire_t *, iproutedata_t *); |
| static int ip_snmp_get2_v6_media(nce_t *, iproutedata_t *); |
| int ip_snmp_set(queue_t *, int, int, uchar_t *, int); |
| static boolean_t ip_source_routed(ipha_t *); |
| static boolean_t ip_source_route_included(ipha_t *); |
| |
| static void ip_wput_frag(ire_t *, mblk_t *, ip_pkt_t, uint32_t, uint32_t, |
| zoneid_t); |
| static mblk_t *ip_wput_frag_copyhdr(uchar_t *, int, int); |
| static void ip_wput_local_options(ipha_t *); |
| static int ip_wput_options(queue_t *, mblk_t *, ipha_t *, boolean_t, |
| zoneid_t); |
| |
| static void conn_drain_init(void); |
| static void conn_drain_fini(void); |
| static void conn_drain_tail(conn_t *connp, boolean_t closing); |
| |
| static void conn_walk_drain(void); |
| static void conn_walk_fanout_table(connf_t *, uint_t, pfv_t, void *, |
| zoneid_t); |
| |
| static boolean_t conn_wantpacket(conn_t *, ill_t *, ipha_t *, int, |
| zoneid_t); |
| static void ip_arp_done(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, |
| void *dummy_arg); |
| |
| static int ip_forward_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); |
| |
| static int ip_multirt_apply_membership(int (*fn)(conn_t *, boolean_t, |
| ipaddr_t, ipaddr_t, uint_t *, mcast_record_t, ipaddr_t, mblk_t *), ire_t *, |
| conn_t *, boolean_t, ipaddr_t, mcast_record_t, ipaddr_t, mblk_t *); |
| static void ip_multirt_bad_mtu(ire_t *, uint32_t); |
| |
| static int ip_cgtp_filter_get(queue_t *, mblk_t *, caddr_t, cred_t *); |
| static int ip_cgtp_filter_set(queue_t *, mblk_t *, char *, |
| caddr_t, cred_t *); |
| extern int ip_squeue_bind_set(queue_t *q, mblk_t *mp, char *value, |
| caddr_t cp, cred_t *cr); |
| extern int ip_squeue_profile_set(queue_t *, mblk_t *, char *, caddr_t, |
| cred_t *); |
| static int ip_input_proc_set(queue_t *q, mblk_t *mp, char *value, |
| caddr_t cp, cred_t *cr); |
| static int ip_int_set(queue_t *, mblk_t *, char *, caddr_t, |
| cred_t *); |
| static squeue_func_t ip_squeue_switch(int); |
| |
| static void ip_kstat_init(void); |
| static void ip_kstat_fini(void); |
| static int ip_kstat_update(kstat_t *kp, int rw); |
| static void icmp_kstat_init(void); |
| static void icmp_kstat_fini(void); |
| static int icmp_kstat_update(kstat_t *kp, int rw); |
| |
| static int ip_conn_report(queue_t *, mblk_t *, caddr_t, cred_t *); |
| |
| static mblk_t *ip_tcp_input(mblk_t *, ipha_t *, ill_t *, boolean_t, |
| ire_t *, mblk_t *, uint_t, queue_t *, ill_rx_ring_t *); |
| |
| static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, |
| ipha_t *, ill_t *, boolean_t); |
| |
| timeout_id_t ip_ire_expire_id; /* IRE expiration timer. */ |
| static clock_t ip_ire_arp_time_elapsed; /* Time since IRE cache last flushed */ |
| static clock_t ip_ire_rd_time_elapsed; /* ... redirect IREs last flushed */ |
| static clock_t ip_ire_pmtu_time_elapsed; /* Time since path mtu increase */ |
| |
| ipaddr_t ip_g_all_ones = IP_HOST_MASK; |
| clock_t icmp_pkt_err_last = 0; /* Time since last icmp_pkt_err */ |
| uint_t icmp_pkt_err_sent = 0; /* Number of packets sent in burst */ |
| |
| /* How long, in seconds, we allow frags to hang around. */ |
| #define IP_FRAG_TIMEOUT 60 |
| |
| time_t ip_g_frag_timeout = IP_FRAG_TIMEOUT; |
| clock_t ip_g_frag_timo_ms = IP_FRAG_TIMEOUT * 1000; |
| |
| /* |
| * Threshold which determines whether MDT should be used when |
| * generating IP fragments; payload size must be greater than |
| * this threshold for MDT to take place. |
| */ |
| #define IP_WPUT_FRAG_MDT_MIN 32768 |
| |
| int ip_wput_frag_mdt_min = IP_WPUT_FRAG_MDT_MIN; |
| |
| /* Protected by ip_mi_lock */ |
| static void *ip_g_head; /* Instance Data List Head */ |
| kmutex_t ip_mi_lock; /* Lock for list of instances */ |
| |
| /* Only modified during _init and _fini thus no locking is needed. */ |
| caddr_t ip_g_nd; /* Named Dispatch List Head */ |
| |
| |
| static long ip_rput_pullups; |
| int dohwcksum = 1; /* use h/w cksum if supported by the hardware */ |
| |
| vmem_t *ip_minor_arena; |
| |
| /* |
| * MIB-2 stuff for SNMP (both IP and ICMP) |
| */ |
| mib2_ipIfStatsEntry_t ip_mib; |
| mib2_icmp_t icmp_mib; |
| |
| #ifdef DEBUG |
| uint32_t ipsechw_debug = 0; |
| #endif |
| |
| kstat_t *ip_mibkp; /* kstat exporting ip_mib data */ |
| kstat_t *icmp_mibkp; /* kstat exporting icmp_mib data */ |
| |
| uint_t loopback_packets = 0; |
| |
| /* |
| * Multirouting/CGTP stuff |
| */ |
| cgtp_filter_ops_t *ip_cgtp_filter_ops; /* CGTP hooks */ |
| int ip_cgtp_filter_rev = CGTP_FILTER_REV; /* CGTP hooks version */ |
| boolean_t ip_cgtp_filter; /* Enable/disable CGTP hooks */ |
| /* Interval (in ms) between consecutive 'bad MTU' warnings */ |
| hrtime_t ip_multirt_log_interval = 1000; |
| /* Time since last warning issued. */ |
| static hrtime_t multirt_bad_mtu_last_time = 0; |
| |
| kmutex_t ip_trash_timer_lock; |
| krwlock_t ip_g_nd_lock; |
| |
| /* |
| * XXX following really should only be in a header. Would need more |
| * header and .c clean up first. |
| */ |
| extern optdb_obj_t ip_opt_obj; |
| |
| ulong_t ip_squeue_enter_unbound = 0; |
| |
| /* |
| * Named Dispatch Parameter Table. |
| * All of these are alterable, within the min/max values given, at run time. |
| */ |
| static ipparam_t lcl_param_arr[] = { |
| /* min max value name */ |
| { 0, 1, 0, "ip_respond_to_address_mask_broadcast"}, |
| { 0, 1, 1, "ip_respond_to_echo_broadcast"}, |
| { 0, 1, 1, "ip_respond_to_echo_multicast"}, |
| { 0, 1, 0, "ip_respond_to_timestamp"}, |
| { 0, 1, 0, "ip_respond_to_timestamp_broadcast"}, |
| { 0, 1, 1, "ip_send_redirects"}, |
| { 0, 1, 0, "ip_forward_directed_broadcasts"}, |
| { 0, 10, 0, "ip_debug"}, |
| { 0, 10, 0, "ip_mrtdebug"}, |
| { 5000, 999999999, 60000, "ip_ire_timer_interval" }, |
| { 60000, 999999999, 1200000, "ip_ire_arp_interval" }, |
| { 60000, 999999999, 60000, "ip_ire_redirect_interval" }, |
| { 1, 255, 255, "ip_def_ttl" }, |
| { 0, 1, 0, "ip_forward_src_routed"}, |
| { 0, 256, 32, "ip_wroff_extra" }, |
| { 5000, 999999999, 600000, "ip_ire_pathmtu_interval" }, |
| { 8, 65536, 64, "ip_icmp_return_data_bytes" }, |
| { 0, 1, 1, "ip_path_mtu_discovery" }, |
| { 0, 240, 30, "ip_ignore_delete_time" }, |
| { 0, 1, 0, "ip_ignore_redirect" }, |
| { 0, 1, 1, "ip_output_queue" }, |
| { 1, 254, 1, "ip_broadcast_ttl" }, |
| { 0, 99999, 100, "ip_icmp_err_interval" }, |
| { 1, 99999, 10, "ip_icmp_err_burst" }, |
| { 0, 999999999, 1000000, "ip_reass_queue_bytes" }, |
| { 0, 1, 0, "ip_strict_dst_multihoming" }, |
| { 1, MAX_ADDRS_PER_IF, 256, "ip_addrs_per_if"}, |
| { 0, 1, 0, "ipsec_override_persocket_policy" }, |
| { 0, 1, 1, "icmp_accept_clear_messages" }, |
| { 0, 1, 1, "igmp_accept_clear_messages" }, |
| { 2, 999999999, ND_DELAY_FIRST_PROBE_TIME, |
| "ip_ndp_delay_first_probe_time"}, |
| { 1, 999999999, ND_MAX_UNICAST_SOLICIT, |
| "ip_ndp_max_unicast_solicit"}, |
| { 1, 255, IPV6_MAX_HOPS, "ip6_def_hops" }, |
| { 8, IPV6_MIN_MTU, IPV6_MIN_MTU, "ip6_icmp_return_data_bytes" }, |
| { 0, 1, 0, "ip6_forward_src_routed"}, |
| { 0, 1, 1, "ip6_respond_to_echo_multicast"}, |
| { 0, 1, 1, "ip6_send_redirects"}, |
| { 0, 1, 0, "ip6_ignore_redirect" }, |
| { 0, 1, 0, "ip6_strict_dst_multihoming" }, |
| |
| { 1, 8, 3, "ip_ire_reclaim_fraction" }, |
| |
| { 0, 999999, 1000, "ipsec_policy_log_interval" }, |
| |
| { 0, 1, 1, "pim_accept_clear_messages" }, |
| { 1000, 20000, 2000, "ip_ndp_unsolicit_interval" }, |
| { 1, 20, 3, "ip_ndp_unsolicit_count" }, |
| { 0, 1, 1, "ip6_ignore_home_address_opt" }, |
| { 0, 15, 0, "ip_policy_mask" }, |
| { 1000, 60000, 1000, "ip_multirt_resolution_interval" }, |
| { 0, 255, 1, "ip_multirt_ttl" }, |
| { 0, 1, 1, "ip_multidata_outbound" }, |
| { 0, 3600000, 300000, "ip_ndp_defense_interval" }, |
| { 0, 999999, 60*60*24, "ip_max_temp_idle" }, |
| { 0, 1000, 1, "ip_max_temp_defend" }, |
| { 0, 1000, 3, "ip_max_defend" }, |
| { 0, 999999, 30, "ip_defend_interval" }, |
| { 0, 3600000, 300000, "ip_dup_recovery" }, |
| { 0, 1, 1, "ip_restrict_interzone_loopback" }, |
| { 0, 1, 1, "ip_lso_outbound" }, |
| #ifdef DEBUG |
| { 0, 1, 0, "ip6_drop_inbound_icmpv6" }, |
| #endif |
| }; |
| |
| ipparam_t *ip_param_arr = lcl_param_arr; |
| |
| /* Extended NDP table */ |
| static ipndp_t lcl_ndp_arr[] = { |
| /* getf setf data name */ |
| { ip_param_generic_get, ip_forward_set, (caddr_t)&ip_g_forward, |
| "ip_forwarding" }, |
| { ip_param_generic_get, ip_forward_set, (caddr_t)&ipv6_forward, |
| "ip6_forwarding" }, |
| { ip_ill_report, NULL, NULL, |
| "ip_ill_status" }, |
| { ip_ipif_report, NULL, NULL, |
| "ip_ipif_status" }, |
| { ip_ire_report, NULL, NULL, |
| "ipv4_ire_status" }, |
| { ip_ire_report_mrtun, NULL, NULL, |
| "ipv4_mrtun_ire_status" }, |
| { ip_ire_report_srcif, NULL, NULL, |
| "ipv4_srcif_ire_status" }, |
| { ip_ire_report_v6, NULL, NULL, |
| "ipv6_ire_status" }, |
| { ip_conn_report, NULL, NULL, |
| "ip_conn_status" }, |
| { nd_get_long, nd_set_long, (caddr_t)&ip_rput_pullups, |
| "ip_rput_pullups" }, |
| { ndp_report, NULL, NULL, |
| "ip_ndp_cache_report" }, |
| { ip_srcid_report, NULL, NULL, |
| "ip_srcid_status" }, |
| { ip_param_generic_get, ip_squeue_profile_set, |
| (caddr_t)&ip_squeue_profile, "ip_squeue_profile" }, |
| { ip_param_generic_get, ip_squeue_bind_set, |
| (caddr_t)&ip_squeue_bind, "ip_squeue_bind" }, |
| { ip_param_generic_get, ip_input_proc_set, |
| (caddr_t)&ip_squeue_enter, "ip_squeue_enter" }, |
| { ip_param_generic_get, ip_int_set, |
| (caddr_t)&ip_squeue_fanout, "ip_squeue_fanout" }, |
| { ip_cgtp_filter_get, ip_cgtp_filter_set, (caddr_t)&ip_cgtp_filter, |
| "ip_cgtp_filter" }, |
| { ip_param_generic_get, ip_int_set, |
| (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" } |
| }; |
| |
| /* |
| * ip_g_forward controls IP forwarding. It takes two values: |
| * 0: IP_FORWARD_NEVER Don't forward packets ever. |
| * 1: IP_FORWARD_ALWAYS Forward packets for elsewhere. |
| * |
| * RFC1122 says there must be a configuration switch to control forwarding, |
| * but that the default MUST be to not forward packets ever. Implicit |
| * control based on configuration of multiple interfaces MUST NOT be |
| * implemented (Section 3.1). SunOS 4.1 did provide the "automatic" capability |
| * and, in fact, it was the default. That capability is now provided in the |
| * /etc/rc2.d/S69inet script. |
| */ |
| int ip_g_forward = IP_FORWARD_DEFAULT; |
| |
| /* It also has an IPv6 counterpart. */ |
| |
| int ipv6_forward = IP_FORWARD_DEFAULT; |
| |
| /* |
| * Table of IP ioctls encoding the various properties of the ioctl and |
| * indexed based on the last byte of the ioctl command. Occasionally there |
| * is a clash, and there is more than 1 ioctl with the same last byte. |
| * In such a case 1 ioctl is encoded in the ndx table and the remaining |
| * ioctls are encoded in the misc table. An entry in the ndx table is |
| * retrieved by indexing on the last byte of the ioctl command and comparing |
| * the ioctl command with the value in the ndx table. In the event of a |
| * mismatch the misc table is then searched sequentially for the desired |
| * ioctl command. |
| * |
| * Entry: <command> <copyin_size> <flags> <cmd_type> <function> <restart_func> |
| */ |
| ip_ioctl_cmd_t ip_ndx_ioctl_table[] = { |
| /* 000 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 001 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 002 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 003 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 004 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 005 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 006 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 007 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 008 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 009 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 010 */ { SIOCADDRT, sizeof (struct rtentry), IPI_PRIV, |
| MISC_CMD, ip_siocaddrt, NULL }, |
| /* 011 */ { SIOCDELRT, sizeof (struct rtentry), IPI_PRIV, |
| MISC_CMD, ip_siocdelrt, NULL }, |
| |
| /* 012 */ { SIOCSIFADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| IF_CMD, ip_sioctl_addr, ip_sioctl_addr_restart }, |
| /* 013 */ { SIOCGIFADDR, sizeof (struct ifreq), IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_addr, NULL }, |
| |
| /* 014 */ { SIOCSIFDSTADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| IF_CMD, ip_sioctl_dstaddr, ip_sioctl_dstaddr_restart }, |
| /* 015 */ { SIOCGIFDSTADDR, sizeof (struct ifreq), |
| IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_dstaddr, NULL }, |
| |
| /* 016 */ { SIOCSIFFLAGS, sizeof (struct ifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| IF_CMD, ip_sioctl_flags, ip_sioctl_flags_restart }, |
| /* 017 */ { SIOCGIFFLAGS, sizeof (struct ifreq), |
| IPI_MODOK | IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_flags, NULL }, |
| |
| /* 018 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 019 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* copyin size cannot be coded for SIOCGIFCONF */ |
| /* 020 */ { O_SIOCGIFCONF, 0, IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_get_ifconf, NULL }, |
| |
| /* 021 */ { SIOCSIFMTU, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| IF_CMD, ip_sioctl_mtu, NULL }, |
| /* 022 */ { SIOCGIFMTU, sizeof (struct ifreq), IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_mtu, NULL }, |
| /* 023 */ { SIOCGIFBRDADDR, sizeof (struct ifreq), |
| IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_brdaddr, NULL }, |
| /* 024 */ { SIOCSIFBRDADDR, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| IF_CMD, ip_sioctl_brdaddr, NULL }, |
| /* 025 */ { SIOCGIFNETMASK, sizeof (struct ifreq), |
| IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_netmask, NULL }, |
| /* 026 */ { SIOCSIFNETMASK, sizeof (struct ifreq), IPI_PRIV | IPI_WR, |
| IF_CMD, ip_sioctl_netmask, ip_sioctl_netmask_restart }, |
| /* 027 */ { SIOCGIFMETRIC, sizeof (struct ifreq), |
| IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_metric, NULL }, |
| /* 028 */ { SIOCSIFMETRIC, sizeof (struct ifreq), IPI_PRIV, |
| IF_CMD, ip_sioctl_metric, NULL }, |
| /* 029 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* See 166-168 below for extended SIOC*XARP ioctls */ |
| /* 030 */ { SIOCSARP, sizeof (struct arpreq), IPI_PRIV, |
| MISC_CMD, ip_sioctl_arp, NULL }, |
| /* 031 */ { SIOCGARP, sizeof (struct arpreq), IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_arp, NULL }, |
| /* 032 */ { SIOCDARP, sizeof (struct arpreq), IPI_PRIV, |
| MISC_CMD, ip_sioctl_arp, NULL }, |
| |
| /* 033 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 034 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 035 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 036 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 037 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 038 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 039 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 040 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 041 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 042 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 043 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 044 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 045 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 046 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 047 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 048 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 049 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 050 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 051 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 052 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 053 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 054 */ { IF_UNITSEL, sizeof (int), IPI_PRIV | IPI_WR | IPI_MODOK, |
| MISC_CMD, if_unitsel, if_unitsel_restart }, |
| |
| /* 055 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 056 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 057 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 058 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 059 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 060 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 061 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 062 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 063 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 064 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 065 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 066 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 067 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 068 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 069 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 070 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 071 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 072 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 073 */ { SIOCSIFNAME, sizeof (struct ifreq), |
| IPI_PRIV | IPI_WR | IPI_MODOK, |
| IF_CMD, ip_sioctl_sifname, NULL }, |
| |
| /* 074 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 075 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 076 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 077 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 078 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 079 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 080 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 081 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 082 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 083 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 084 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 085 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 086 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 087 */ { SIOCGIFNUM, sizeof (int), IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_get_ifnum, NULL }, |
| /* 088 */ { SIOCGIFMUXID, sizeof (struct ifreq), IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_muxid, NULL }, |
| /* 089 */ { SIOCSIFMUXID, sizeof (struct ifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| IF_CMD, ip_sioctl_muxid, NULL }, |
| |
| /* Both if and lif variants share same func */ |
| /* 090 */ { SIOCGIFINDEX, sizeof (struct ifreq), IPI_GET_CMD | IPI_REPL, |
| IF_CMD, ip_sioctl_get_lifindex, NULL }, |
| /* Both if and lif variants share same func */ |
| /* 091 */ { SIOCSIFINDEX, sizeof (struct ifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| IF_CMD, ip_sioctl_slifindex, NULL }, |
| |
| /* copyin size cannot be coded for SIOCGIFCONF */ |
| /* 092 */ { SIOCGIFCONF, 0, IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_get_ifconf, NULL }, |
| /* 093 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 094 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 095 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 096 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 097 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 098 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 099 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 100 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 101 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 102 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 103 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 104 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 105 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 106 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 107 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 108 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 109 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 110 */ { SIOCLIFREMOVEIF, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_removeif, |
| ip_sioctl_removeif_restart }, |
| /* 111 */ { SIOCLIFADDIF, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_addif, NULL }, |
| #define SIOCLIFADDR_NDX 112 |
| /* 112 */ { SIOCSLIFADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_addr, ip_sioctl_addr_restart }, |
| /* 113 */ { SIOCGLIFADDR, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_addr, NULL }, |
| /* 114 */ { SIOCSLIFDSTADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_dstaddr, ip_sioctl_dstaddr_restart }, |
| /* 115 */ { SIOCGLIFDSTADDR, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_dstaddr, NULL }, |
| /* 116 */ { SIOCSLIFFLAGS, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_flags, ip_sioctl_flags_restart }, |
| /* 117 */ { SIOCGLIFFLAGS, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_MODOK | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_flags, NULL }, |
| |
| /* 118 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 119 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 120 */ { O_SIOCGLIFCONF, 0, IPI_GET_CMD, MISC_CMD | IPI_REPL, |
| ip_sioctl_get_lifconf, NULL }, |
| /* 121 */ { SIOCSLIFMTU, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_mtu, NULL }, |
| /* 122 */ { SIOCGLIFMTU, sizeof (struct lifreq), IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_mtu, NULL }, |
| /* 123 */ { SIOCGLIFBRDADDR, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_brdaddr, NULL }, |
| /* 124 */ { SIOCSLIFBRDADDR, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_brdaddr, NULL }, |
| /* 125 */ { SIOCGLIFNETMASK, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_netmask, NULL }, |
| /* 126 */ { SIOCSLIFNETMASK, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_netmask, ip_sioctl_netmask_restart }, |
| /* 127 */ { SIOCGLIFMETRIC, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_metric, NULL }, |
| /* 128 */ { SIOCSLIFMETRIC, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_metric, NULL }, |
| /* 129 */ { SIOCSLIFNAME, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_MODOK | IPI_REPL, |
| LIF_CMD, ip_sioctl_slifname, |
| ip_sioctl_slifname_restart }, |
| |
| /* 130 */ { SIOCGLIFNUM, sizeof (struct lifnum), IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_get_lifnum, NULL }, |
| /* 131 */ { SIOCGLIFMUXID, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_muxid, NULL }, |
| /* 132 */ { SIOCSLIFMUXID, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_muxid, NULL }, |
| /* 133 */ { SIOCGLIFINDEX, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_lifindex, 0 }, |
| /* 134 */ { SIOCSLIFINDEX, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_slifindex, 0 }, |
| /* 135 */ { SIOCSLIFTOKEN, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_token, NULL }, |
| /* 136 */ { SIOCGLIFTOKEN, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_token, NULL }, |
| /* 137 */ { SIOCSLIFSUBNET, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_subnet, ip_sioctl_subnet_restart }, |
| /* 138 */ { SIOCGLIFSUBNET, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_subnet, NULL }, |
| /* 139 */ { SIOCSLIFLNKINFO, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_lnkinfo, NULL }, |
| |
| /* 140 */ { SIOCGLIFLNKINFO, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_lnkinfo, NULL }, |
| /* 141 */ { SIOCLIFDELND, sizeof (struct lifreq), IPI_PRIV, |
| LIF_CMD, ip_siocdelndp_v6, NULL }, |
| /* 142 */ { SIOCLIFGETND, sizeof (struct lifreq), IPI_GET_CMD, |
| LIF_CMD, ip_siocqueryndp_v6, NULL }, |
| /* 143 */ { SIOCLIFSETND, sizeof (struct lifreq), IPI_PRIV, |
| LIF_CMD, ip_siocsetndp_v6, NULL }, |
| /* 144 */ { SIOCTMYADDR, sizeof (struct sioc_addrreq), IPI_GET_CMD, |
| MISC_CMD, ip_sioctl_tmyaddr, NULL }, |
| /* 145 */ { SIOCTONLINK, sizeof (struct sioc_addrreq), IPI_GET_CMD, |
| MISC_CMD, ip_sioctl_tonlink, NULL }, |
| /* 146 */ { SIOCTMYSITE, sizeof (struct sioc_addrreq), 0, |
| MISC_CMD, ip_sioctl_tmysite, NULL }, |
| /* 147 */ { SIOCGTUNPARAM, sizeof (struct iftun_req), IPI_REPL, |
| TUN_CMD, ip_sioctl_tunparam, NULL }, |
| /* 148 */ { SIOCSTUNPARAM, sizeof (struct iftun_req), |
| IPI_PRIV | IPI_WR, |
| TUN_CMD, ip_sioctl_tunparam, NULL }, |
| |
| /* IPSECioctls handled in ip_sioctl_copyin_setup itself */ |
| /* 149 */ { SIOCFIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, |
| /* 150 */ { SIOCSIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, |
| /* 151 */ { SIOCDIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, |
| /* 152 */ { SIOCLIPSECONFIG, 0, IPI_PRIV, MISC_CMD, NULL, NULL }, |
| |
| /* 153 */ { SIOCLIFFAILOVER, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_move, ip_sioctl_move }, |
| /* 154 */ { SIOCLIFFAILBACK, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_move, ip_sioctl_move }, |
| /* 155 */ { SIOCSLIFGROUPNAME, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_groupname, ip_sioctl_groupname }, |
| /* 156 */ { SIOCGLIFGROUPNAME, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_groupname, NULL }, |
| /* 157 */ { SIOCGLIFOINDEX, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_oindex, NULL }, |
| |
| /* Leave 158-160 unused; used to be SIOC*IFARP ioctls */ |
| /* 158 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 159 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 160 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| |
| /* 161 */ { SIOCSLIFOINDEX, sizeof (struct lifreq), IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_slifoindex, NULL }, |
| |
| /* These are handled in ip_sioctl_copyin_setup itself */ |
| /* 162 */ { SIOCGIP6ADDRPOLICY, 0, IPI_NULL_BCONT, |
| MISC_CMD, NULL, NULL }, |
| /* 163 */ { SIOCSIP6ADDRPOLICY, 0, IPI_PRIV | IPI_NULL_BCONT, |
| MISC_CMD, NULL, NULL }, |
| /* 164 */ { SIOCGDSTINFO, 0, IPI_GET_CMD, MISC_CMD, NULL, NULL }, |
| |
| /* 165 */ { SIOCGLIFCONF, 0, IPI_GET_CMD, MISC_CMD | IPI_REPL, |
| ip_sioctl_get_lifconf, NULL }, |
| |
| /* 166 */ { SIOCSXARP, sizeof (struct xarpreq), IPI_PRIV, |
| MISC_CMD, ip_sioctl_xarp, NULL }, |
| /* 167 */ { SIOCGXARP, sizeof (struct xarpreq), IPI_GET_CMD | IPI_REPL, |
| MISC_CMD, ip_sioctl_xarp, NULL }, |
| /* 168 */ { SIOCDXARP, sizeof (struct xarpreq), IPI_PRIV, |
| MISC_CMD, ip_sioctl_xarp, NULL }, |
| |
| /* SIOCPOPSOCKFS is not handled by IP */ |
| /* 169 */ { IPI_DONTCARE /* SIOCPOPSOCKFS */, 0, 0, 0, NULL, NULL }, |
| |
| /* 170 */ { SIOCGLIFZONE, sizeof (struct lifreq), |
| IPI_GET_CMD | IPI_REPL, |
| LIF_CMD, ip_sioctl_get_lifzone, NULL }, |
| /* 171 */ { SIOCSLIFZONE, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR | IPI_REPL, |
| LIF_CMD, ip_sioctl_slifzone, |
| ip_sioctl_slifzone_restart }, |
| /* 172-174 are SCTP ioctls and not handled by IP */ |
| /* 172 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 173 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 174 */ { IPI_DONTCARE, 0, 0, 0, NULL, NULL }, |
| /* 175 */ { SIOCGLIFUSESRC, sizeof (struct lifreq), |
| IPI_GET_CMD, LIF_CMD, |
| ip_sioctl_get_lifusesrc, 0 }, |
| /* 176 */ { SIOCSLIFUSESRC, sizeof (struct lifreq), |
| IPI_PRIV | IPI_WR, |
| LIF_CMD, ip_sioctl_slifusesrc, |
| NULL }, |
| /* 177 */ { SIOCGLIFSRCOF, 0, IPI_GET_CMD, MISC_CMD, |
| ip_sioctl_get_lifsrcof, NULL }, |
| /* 178 */ { SIOCGMSFILTER, sizeof (struct group_filter), IPI_GET_CMD, |
| MISC_CMD, ip_sioctl_msfilter, NULL }, |
| /* 179 */ { SIOCSMSFILTER, sizeof (struct group_filter), IPI_WR, |
| MISC_CMD, ip_sioctl_msfilter, NULL }, |
| /* 180 */ { SIOCGIPMSFILTER, sizeof (struct ip_msfilter), IPI_GET_CMD, |
| MISC_CMD, ip_sioctl_msfilter, NULL }, |
| /* 181 */ { SIOCSIPMSFILTER, sizeof (struct ip_msfilter), IPI_WR, |
| MISC_CMD, ip_sioctl_msfilter, NULL }, |
| /* 182 */ { SIOCSIPMPFAILBACK, sizeof (int), IPI_PRIV, MISC_CMD, |
| ip_sioctl_set_ipmpfailback, NULL } |
| }; |
| |
| int ip_ndx_ioctl_count = sizeof (ip_ndx_ioctl_table) / sizeof (ip_ioctl_cmd_t); |
| |
| ip_ioctl_cmd_t ip_misc_ioctl_table[] = { |
| { OSIOCGTUNPARAM, sizeof (struct old_iftun_req), |
| IPI_GET_CMD | IPI_REPL, TUN_CMD, ip_sioctl_tunparam, NULL }, |
| { OSIOCSTUNPARAM, sizeof (struct old_iftun_req), IPI_PRIV | IPI_WR, |
| TUN_CMD, ip_sioctl_tunparam, NULL }, |
| { I_LINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, |
| { I_UNLINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, |
| { I_PLINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, |
| { I_PUNLINK, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, |
| { ND_GET, 0, IPI_PASS_DOWN, 0, NULL, NULL }, |
| { ND_SET, 0, IPI_PRIV | IPI_WR | IPI_PASS_DOWN, 0, NULL, NULL }, |
| { IP_IOCTL, 0, 0, 0, NULL, NULL }, |
| { SIOCGETVIFCNT, sizeof (struct sioc_vif_req), IPI_REPL | IPI_GET_CMD, |
| MISC_CMD, mrt_ioctl}, |
| { SIOCGETSGCNT, sizeof (struct sioc_sg_req), IPI_REPL | IPI_GET_CMD, |
| MISC_CMD, mrt_ioctl}, |
| { SIOCGETLSGCNT, sizeof (struct sioc_lsg_req), IPI_REPL | IPI_GET_CMD, |
| MISC_CMD, mrt_ioctl} |
| }; |
| |
| int ip_misc_ioctl_count = |
| sizeof (ip_misc_ioctl_table) / sizeof (ip_ioctl_cmd_t); |
| |
| static idl_t *conn_drain_list; /* The array of conn drain lists */ |
| static uint_t conn_drain_list_cnt; /* Total count of conn_drain_list */ |
| static int conn_drain_list_index; /* Next drain_list to be used */ |
| int conn_drain_nthreads; /* Number of drainers reqd. */ |
| /* Settable in /etc/system */ |
| uint_t ip_redirect_cnt; /* Num of redirect routes in ftable */ |
| |
| /* Defined in ip_ire.c */ |
| extern uint32_t ip_ire_max_bucket_cnt, ip6_ire_max_bucket_cnt; |
| extern uint32_t ip_ire_min_bucket_cnt, ip6_ire_min_bucket_cnt; |
| extern uint32_t ip_ire_mem_ratio, ip_ire_cpu_ratio; |
| |
| static nv_t ire_nv_arr[] = { |
| { IRE_BROADCAST, "BROADCAST" }, |
| { IRE_LOCAL, "LOCAL" }, |
| { IRE_LOOPBACK, "LOOPBACK" }, |
| { IRE_CACHE, "CACHE" }, |
| { IRE_DEFAULT, "DEFAULT" }, |
| { IRE_PREFIX, "PREFIX" }, |
| { IRE_IF_NORESOLVER, "IF_NORESOL" }, |
| { IRE_IF_RESOLVER, "IF_RESOLV" }, |
| { IRE_HOST, "HOST" }, |
| { 0 } |
| }; |
| |
| nv_t *ire_nv_tbl = ire_nv_arr; |
| |
| /* Defined in ip_if.c, protect the list of IPsec capable ills */ |
| extern krwlock_t ipsec_capab_ills_lock; |
| |
| /* Defined in ip_netinfo.c */ |
| extern ddi_taskq_t *eventq_queue_nic; |
| |
| /* Packet dropper for IP IPsec processing failures */ |
| ipdropper_t ip_dropper; |
| |
| /* Simple ICMP IP Header Template */ |
| static ipha_t icmp_ipha = { |
| IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP |
| }; |
| |
| struct module_info ip_mod_info = { |
| IP_MOD_ID, IP_MOD_NAME, 1, INFPSZ, 65536, 1024 |
| }; |
| |
| /* |
| * Duplicate static symbols within a module confuses mdb; so we avoid the |
| * problem by making the symbols here distinct from those in udp.c. |
| */ |
| |
| static struct qinit iprinit = { |
| (pfi_t)ip_rput, NULL, ip_open, ip_close, NULL, |
| &ip_mod_info |
| }; |
| |
| static struct qinit ipwinit = { |
| (pfi_t)ip_wput, (pfi_t)ip_wsrv, ip_open, ip_close, NULL, |
| &ip_mod_info |
| }; |
| |
| static struct qinit iplrinit = { |
| (pfi_t)ip_lrput, NULL, ip_open, ip_close, NULL, |
| &ip_mod_info |
| }; |
| |
| static struct qinit iplwinit = { |
| (pfi_t)ip_lwput, NULL, ip_open, ip_close, NULL, |
| &ip_mod_info |
| }; |
| |
| struct streamtab ipinfo = { |
| &iprinit, &ipwinit, &iplrinit, &iplwinit |
| }; |
| |
| #ifdef DEBUG |
| static boolean_t skip_sctp_cksum = B_FALSE; |
| #endif |
| |
| /* |
| * Prepend the zoneid using an ipsec_out_t for later use by functions like |
| * ip_rput_v6(), ip_output(), etc. If the message |
| * block already has a M_CTL at the front of it, then simply set the zoneid |
| * appropriately. |
| */ |
| mblk_t * |
| ip_prepend_zoneid(mblk_t *mp, zoneid_t zoneid) |
| { |
| mblk_t *first_mp; |
| ipsec_out_t *io; |
| |
| ASSERT(zoneid != ALL_ZONES); |
| if (mp->b_datap->db_type == M_CTL) { |
| io = (ipsec_out_t *)mp->b_rptr; |
| ASSERT(io->ipsec_out_type == IPSEC_OUT); |
| io->ipsec_out_zoneid = zoneid; |
| return (mp); |
| } |
| |
| first_mp = ipsec_alloc_ipsec_out(); |
| if (first_mp == NULL) |
| return (NULL); |
| io = (ipsec_out_t *)first_mp->b_rptr; |
| /* This is not a secure packet */ |
| io->ipsec_out_secure = B_FALSE; |
| io->ipsec_out_zoneid = zoneid; |
| first_mp->b_cont = mp; |
| return (first_mp); |
| } |
| |
| /* |
| * Copy an M_CTL-tagged message, preserving reference counts appropriately. |
| */ |
| mblk_t * |
| ip_copymsg(mblk_t *mp) |
| { |
| mblk_t *nmp; |
| ipsec_info_t *in; |
| |
| if (mp->b_datap->db_type != M_CTL) |
| return (copymsg(mp)); |
| |
| in = (ipsec_info_t *)mp->b_rptr; |
| |
| /* |
| * Note that M_CTL is also used for delivering ICMP error messages |
| * upstream to transport layers. |
| */ |
| if (in->ipsec_info_type != IPSEC_OUT && |
| in->ipsec_info_type != IPSEC_IN) |
| return (copymsg(mp)); |
| |
| nmp = copymsg(mp->b_cont); |
| |
| if (in->ipsec_info_type == IPSEC_OUT) |
| return (ipsec_out_tag(mp, nmp)); |
| else |
| return (ipsec_in_tag(mp, nmp)); |
| } |
| |
| /* Generate an ICMP fragmentation needed message. */ |
| static void |
| icmp_frag_needed(queue_t *q, mblk_t *mp, int mtu, zoneid_t zoneid) |
| { |
| icmph_t icmph; |
| mblk_t *first_mp; |
| boolean_t mctl_present; |
| |
| EXTRACT_PKT_MP(mp, first_mp, mctl_present); |
| |
| if (!(mp = icmp_pkt_err_ok(mp))) { |
| if (mctl_present) |
| freeb(first_mp); |
| return; |
| } |
| |
| bzero(&icmph, sizeof (icmph_t)); |
| icmph.icmph_type = ICMP_DEST_UNREACHABLE; |
| icmph.icmph_code = ICMP_FRAGMENTATION_NEEDED; |
| icmph.icmph_du_mtu = htons((uint16_t)mtu); |
| BUMP_MIB(&icmp_mib, icmpOutFragNeeded); |
| BUMP_MIB(&icmp_mib, icmpOutDestUnreachs); |
| icmp_pkt(q, first_mp, &icmph, sizeof (icmph_t), mctl_present, zoneid); |
| } |
| |
| /* |
| * icmp_inbound deals with ICMP messages in the following ways. |
| * |
| * 1) It needs to send a reply back and possibly delivering it |
| * to the "interested" upper clients. |
| * 2) It needs to send it to the upper clients only. |
| * 3) It needs to change some values in IP only. |
| * 4) It needs to change some values in IP and upper layers e.g TCP. |
| * |
| * We need to accomodate icmp messages coming in clear until we get |
| * everything secure from the wire. If icmp_accept_clear_messages |
| * is zero we check with the global policy and act accordingly. If |
| * it is non-zero, we accept the message without any checks. But |
| * *this does not mean* that this will be delivered to the upper |
| * clients. By accepting we might send replies back, change our MTU |
| * value etc. but delivery to the ULP/clients depends on their policy |
| * dispositions. |
| * |
| * We handle the above 4 cases in the context of IPSEC in the |
| * following way : |
| * |
| * 1) Send the reply back in the same way as the request came in. |
| * If it came in encrypted, it goes out encrypted. If it came in |
| * clear, it goes out in clear. Thus, this will prevent chosen |
| * plain text attack. |
| * 2) The client may or may not expect things to come in secure. |
| * If it comes in secure, the policy constraints are checked |
| * before delivering it to the upper layers. If it comes in |
| * clear, ipsec_inbound_accept_clear will decide whether to |
| * accept this in clear or not. In both the cases, if the returned |
| * message (IP header + 8 bytes) that caused the icmp message has |
| * AH/ESP headers, it is sent up to AH/ESP for validation before |
| * sending up. If there are only 8 bytes of returned message, then |
| * upper client will not be notified. |
| * 3) Check with global policy to see whether it matches the constaints. |
| * But this will be done only if icmp_accept_messages_in_clear is |
| * zero. |
| * 4) If we need to change both in IP and ULP, then the decision taken |
| * while affecting the values in IP and while delivering up to TCP |
| * should be the same. |
| * |
| * There are two cases. |
| * |
| * a) If we reject data at the IP layer (ipsec_check_global_policy() |
| * failed), we will not deliver it to the ULP, even though they |
| * are *willing* to accept in *clear*. This is fine as our global |
| * disposition to icmp messages asks us reject the datagram. |
| * |
| * b) If we accept data at the IP layer (ipsec_check_global_policy() |
| * succeeded or icmp_accept_messages_in_clear is 1), and not able |
| * to deliver it to ULP (policy failed), it can lead to |
| * consistency problems. The cases known at this time are |
| * ICMP_DESTINATION_UNREACHABLE messages with following code |
| * values : |
| * |
| * - ICMP_FRAGMENTATION_NEEDED : IP adapts to the new value |
| * and Upper layer rejects. Then the communication will |
| * come to a stop. This is solved by making similar decisions |
| * at both levels. Currently, when we are unable to deliver |
| * to the Upper Layer (due to policy failures) while IP has |
| * adjusted ire_max_frag, the next outbound datagram would |
| * generate a local ICMP_FRAGMENTATION_NEEDED message - which |
| * will be with the right level of protection. Thus the right |
| * value will be communicated even if we are not able to |
| * communicate when we get from the wire initially. But this |
| * assumes there would be at least one outbound datagram after |
| * IP has adjusted its ire_max_frag value. To make things |
| * simpler, we accept in clear after the validation of |
| * AH/ESP headers. |
| * |
| * - Other ICMP ERRORS : We may not be able to deliver it to the |
| * upper layer depending on the level of protection the upper |
| * layer expects and the disposition in ipsec_inbound_accept_clear(). |
| * ipsec_inbound_accept_clear() decides whether a given ICMP error |
| * should be accepted in clear when the Upper layer expects secure. |
| * Thus the communication may get aborted by some bad ICMP |
| * packets. |
| * |
| * IPQoS Notes: |
| * The only instance when a packet is sent for processing is when there |
| * isn't an ICMP client and if we are interested in it. |
| * If there is a client, IPPF processing will take place in the |
| * ip_fanout_proto routine. |
| * |
| * Zones notes: |
| * The packet is only processed in the context of the specified zone: typically |
| * only this zone will reply to an echo request, and only interested clients in |
| * this zone will receive a copy of the packet. This means that the caller must |
| * call icmp_inbound() for each relevant zone. |
| */ |
| static void |
| icmp_inbound(queue_t *q, mblk_t *mp, boolean_t broadcast, ill_t *ill, |
| int sum_valid, uint32_t sum, boolean_t mctl_present, boolean_t ip_policy, |
| ill_t *recv_ill, zoneid_t zoneid) |
| { |
| icmph_t *icmph; |
| ipha_t *ipha; |
| int iph_hdr_length; |
| int hdr_length; |
| boolean_t interested; |
| uint32_t ts; |
| uchar_t *wptr; |
| ipif_t *ipif; |
| mblk_t *first_mp; |
| ipsec_in_t *ii; |
| ire_t *src_ire; |
| boolean_t onlink; |
| timestruc_t now; |
| uint32_t ill_index; |
| |
| ASSERT(ill != NULL); |
| |
| first_mp = mp; |
| if (mctl_present) { |
| mp = first_mp->b_cont; |
| ASSERT(mp != NULL); |
| } |
| |
| ipha = (ipha_t *)mp->b_rptr; |
| if (icmp_accept_clear_messages == 0) { |
| first_mp = ipsec_check_global_policy(first_mp, NULL, |
| ipha, NULL, mctl_present); |
| if (first_mp == NULL) |
| return; |
| } |
| |
| /* |
| * On a labeled system, we have to check whether the zone itself is |
| * permitted to receive raw traffic. |
| */ |
| if (is_system_labeled()) { |
| if (zoneid == ALL_ZONES) |
| zoneid = tsol_packet_to_zoneid(mp); |
| if (!tsol_can_accept_raw(mp, B_FALSE)) { |
| ip1dbg(("icmp_inbound: zone %d can't receive raw", |
| zoneid)); |
| BUMP_MIB(&icmp_mib, icmpInErrors); |
| freemsg(first_mp); |
| return; |
| } |
| } |
| |
| /* |
| * We have accepted the ICMP message. It means that we will |
| * respond to the packet if needed. It may not be delivered |
| * to the upper client depending on the policy constraints |
| * and the disposition in ipsec_inbound_accept_clear. |
| */ |
| |
| ASSERT(ill != NULL); |
| |
| BUMP_MIB(&icmp_mib, icmpInMsgs); |
| iph_hdr_length = IPH_HDR_LENGTH(ipha); |
| if ((mp->b_wptr - mp->b_rptr) < (iph_hdr_length + ICMPH_SIZE)) { |
| /* Last chance to get real. */ |
| if (!pullupmsg(mp, iph_hdr_length + ICMPH_SIZE)) { |
| BUMP_MIB(&icmp_mib, icmpInErrors); |
| freemsg(first_mp); |
| return; |
| } |
| /* Refresh iph following the pullup. */ |
| ipha = (ipha_t *)mp->b_rptr; |
| } |
| /* ICMP header checksum, including checksum field, should be zero. */ |
| if (sum_valid ? (sum != 0 && sum != 0xFFFF) : |
| IP_CSUM(mp, iph_hdr_length, 0)) { |
| BUMP_MIB(&icmp_mib, icmpInCksumErrs); |
| freemsg(first_mp); |
| return; |
| } |
| /* The IP header will always be a multiple of four bytes */ |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ip2dbg(("icmp_inbound: type %d code %d\n", icmph->icmph_type, |
| icmph->icmph_code)); |
| wptr = (uchar_t *)icmph + ICMPH_SIZE; |
| /* We will set "interested" to "true" if we want a copy */ |
| interested = B_FALSE; |
| switch (icmph->icmph_type) { |
| case ICMP_ECHO_REPLY: |
| BUMP_MIB(&icmp_mib, icmpInEchoReps); |
| break; |
| case ICMP_DEST_UNREACHABLE: |
| if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) |
| BUMP_MIB(&icmp_mib, icmpInFragNeeded); |
| interested = B_TRUE; /* Pass up to transport */ |
| BUMP_MIB(&icmp_mib, icmpInDestUnreachs); |
| break; |
| case ICMP_SOURCE_QUENCH: |
| interested = B_TRUE; /* Pass up to transport */ |
| BUMP_MIB(&icmp_mib, icmpInSrcQuenchs); |
| break; |
| case ICMP_REDIRECT: |
| if (!ip_ignore_redirect) |
| interested = B_TRUE; |
| BUMP_MIB(&icmp_mib, icmpInRedirects); |
| break; |
| case ICMP_ECHO_REQUEST: |
| /* |
| * Whether to respond to echo requests that come in as IP |
| * broadcasts or as IP multicast is subject to debate |
| * (what isn't?). We aim to please, you pick it. |
| * Default is do it. |
| */ |
| if (!broadcast && !CLASSD(ipha->ipha_dst)) { |
| /* unicast: always respond */ |
| interested = B_TRUE; |
| } else if (CLASSD(ipha->ipha_dst)) { |
| /* multicast: respond based on tunable */ |
| interested = ip_g_resp_to_echo_mcast; |
| } else if (broadcast) { |
| /* broadcast: respond based on tunable */ |
| interested = ip_g_resp_to_echo_bcast; |
| } |
| BUMP_MIB(&icmp_mib, icmpInEchos); |
| break; |
| case ICMP_ROUTER_ADVERTISEMENT: |
| case ICMP_ROUTER_SOLICITATION: |
| break; |
| case ICMP_TIME_EXCEEDED: |
| interested = B_TRUE; /* Pass up to transport */ |
| BUMP_MIB(&icmp_mib, icmpInTimeExcds); |
| break; |
| case ICMP_PARAM_PROBLEM: |
| interested = B_TRUE; /* Pass up to transport */ |
| BUMP_MIB(&icmp_mib, icmpInParmProbs); |
| break; |
| case ICMP_TIME_STAMP_REQUEST: |
| /* Response to Time Stamp Requests is local policy. */ |
| if (ip_g_resp_to_timestamp && |
| /* So is whether to respond if it was an IP broadcast. */ |
| (!broadcast || ip_g_resp_to_timestamp_bcast)) { |
| int tstamp_len = 3 * sizeof (uint32_t); |
| |
| if (wptr + tstamp_len > mp->b_wptr) { |
| if (!pullupmsg(mp, wptr + tstamp_len - |
| mp->b_rptr)) { |
| BUMP_MIB(ill->ill_ip_mib, |
| ipIfStatsInDiscards); |
| freemsg(first_mp); |
| return; |
| } |
| /* Refresh ipha following the pullup. */ |
| ipha = (ipha_t *)mp->b_rptr; |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| wptr = (uchar_t *)icmph + ICMPH_SIZE; |
| } |
| interested = B_TRUE; |
| } |
| BUMP_MIB(&icmp_mib, icmpInTimestamps); |
| break; |
| case ICMP_TIME_STAMP_REPLY: |
| BUMP_MIB(&icmp_mib, icmpInTimestampReps); |
| break; |
| case ICMP_INFO_REQUEST: |
| /* Per RFC 1122 3.2.2.7, ignore this. */ |
| case ICMP_INFO_REPLY: |
| break; |
| case ICMP_ADDRESS_MASK_REQUEST: |
| if ((ip_respond_to_address_mask_broadcast || !broadcast) && |
| /* TODO m_pullup of complete header? */ |
| (mp->b_datap->db_lim - wptr) >= IP_ADDR_LEN) |
| interested = B_TRUE; |
| BUMP_MIB(&icmp_mib, icmpInAddrMasks); |
| break; |
| case ICMP_ADDRESS_MASK_REPLY: |
| BUMP_MIB(&icmp_mib, icmpInAddrMaskReps); |
| break; |
| default: |
| interested = B_TRUE; /* Pass up to transport */ |
| BUMP_MIB(&icmp_mib, icmpInUnknowns); |
| break; |
| } |
| /* See if there is an ICMP client. */ |
| if (ipcl_proto_search(IPPROTO_ICMP) != NULL) { |
| /* If there is an ICMP client and we want one too, copy it. */ |
| mblk_t *first_mp1; |
| |
| if (!interested) { |
| ip_fanout_proto(q, first_mp, ill, ipha, 0, mctl_present, |
| ip_policy, recv_ill, zoneid); |
| return; |
| } |
| first_mp1 = ip_copymsg(first_mp); |
| if (first_mp1 != NULL) { |
| ip_fanout_proto(q, first_mp1, ill, ipha, |
| 0, mctl_present, ip_policy, recv_ill, zoneid); |
| } |
| } else if (!interested) { |
| freemsg(first_mp); |
| return; |
| } else { |
| /* |
| * Initiate policy processing for this packet if ip_policy |
| * is true. |
| */ |
| if (IPP_ENABLED(IPP_LOCAL_IN) && ip_policy) { |
| ill_index = ill->ill_phyint->phyint_ifindex; |
| ip_process(IPP_LOCAL_IN, &mp, ill_index); |
| if (mp == NULL) { |
| if (mctl_present) { |
| freeb(first_mp); |
| } |
| BUMP_MIB(&icmp_mib, icmpInErrors); |
| return; |
| } |
| } |
| } |
| /* We want to do something with it. */ |
| /* Check db_ref to make sure we can modify the packet. */ |
| if (mp->b_datap->db_ref > 1) { |
| mblk_t *first_mp1; |
| |
| first_mp1 = ip_copymsg(first_mp); |
| freemsg(first_mp); |
| if (!first_mp1) { |
| BUMP_MIB(&icmp_mib, icmpOutDrops); |
| return; |
| } |
| first_mp = first_mp1; |
| if (mctl_present) { |
| mp = first_mp->b_cont; |
| ASSERT(mp != NULL); |
| } else { |
| mp = first_mp; |
| } |
| ipha = (ipha_t *)mp->b_rptr; |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| wptr = (uchar_t *)icmph + ICMPH_SIZE; |
| } |
| switch (icmph->icmph_type) { |
| case ICMP_ADDRESS_MASK_REQUEST: |
| ipif = ipif_lookup_remote(ill, ipha->ipha_src, zoneid); |
| if (ipif == NULL) { |
| freemsg(first_mp); |
| return; |
| } |
| /* |
| * outging interface must be IPv4 |
| */ |
| ASSERT(ipif != NULL && !ipif->ipif_isv6); |
| icmph->icmph_type = ICMP_ADDRESS_MASK_REPLY; |
| bcopy(&ipif->ipif_net_mask, wptr, IP_ADDR_LEN); |
| ipif_refrele(ipif); |
| BUMP_MIB(&icmp_mib, icmpOutAddrMaskReps); |
| break; |
| case ICMP_ECHO_REQUEST: |
| icmph->icmph_type = ICMP_ECHO_REPLY; |
| BUMP_MIB(&icmp_mib, icmpOutEchoReps); |
| break; |
| case ICMP_TIME_STAMP_REQUEST: { |
| uint32_t *tsp; |
| |
| icmph->icmph_type = ICMP_TIME_STAMP_REPLY; |
| tsp = (uint32_t *)wptr; |
| tsp++; /* Skip past 'originate time' */ |
| /* Compute # of milliseconds since midnight */ |
| gethrestime(&now); |
| ts = (now.tv_sec % (24 * 60 * 60)) * 1000 + |
| now.tv_nsec / (NANOSEC / MILLISEC); |
| *tsp++ = htonl(ts); /* Lay in 'receive time' */ |
| *tsp++ = htonl(ts); /* Lay in 'send time' */ |
| BUMP_MIB(&icmp_mib, icmpOutTimestampReps); |
| break; |
| } |
| default: |
| ipha = (ipha_t *)&icmph[1]; |
| if ((uchar_t *)&ipha[1] > mp->b_wptr) { |
| if (!pullupmsg(mp, (uchar_t *)&ipha[1] - mp->b_rptr)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(first_mp); |
| return; |
| } |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| } |
| if ((IPH_HDR_VERSION(ipha) != IPV4_VERSION)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(first_mp); |
| return; |
| } |
| hdr_length = IPH_HDR_LENGTH(ipha); |
| if (hdr_length < sizeof (ipha_t)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(first_mp); |
| return; |
| } |
| if ((uchar_t *)ipha + hdr_length > mp->b_wptr) { |
| if (!pullupmsg(mp, |
| (uchar_t *)ipha + hdr_length - mp->b_rptr)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(first_mp); |
| return; |
| } |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| } |
| switch (icmph->icmph_type) { |
| case ICMP_REDIRECT: |
| /* |
| * As there is no upper client to deliver, we don't |
| * need the first_mp any more. |
| */ |
| if (mctl_present) { |
| freeb(first_mp); |
| } |
| icmp_redirect(mp); |
| return; |
| case ICMP_DEST_UNREACHABLE: |
| if (icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED) { |
| if (!icmp_inbound_too_big(icmph, ipha, ill, |
| zoneid, mp, iph_hdr_length)) { |
| freemsg(first_mp); |
| return; |
| } |
| /* |
| * icmp_inbound_too_big() may alter mp. |
| * Resynch ipha and icmph accordingly. |
| */ |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| } |
| /* FALLTHRU */ |
| default : |
| /* |
| * IPQoS notes: Since we have already done IPQoS |
| * processing we don't want to do it again in |
| * the fanout routines called by |
| * icmp_inbound_error_fanout, hence the last |
| * argument, ip_policy, is B_FALSE. |
| */ |
| icmp_inbound_error_fanout(q, ill, first_mp, icmph, |
| ipha, iph_hdr_length, hdr_length, mctl_present, |
| B_FALSE, recv_ill, zoneid); |
| } |
| return; |
| } |
| /* Send out an ICMP packet */ |
| icmph->icmph_checksum = 0; |
| icmph->icmph_checksum = IP_CSUM(mp, iph_hdr_length, 0); |
| if (icmph->icmph_checksum == 0) |
| icmph->icmph_checksum = 0xFFFF; |
| if (broadcast || CLASSD(ipha->ipha_dst)) { |
| ipif_t *ipif_chosen; |
| /* |
| * Make it look like it was directed to us, so we don't look |
| * like a fool with a broadcast or multicast source address. |
| */ |
| ipif = ipif_lookup_remote(ill, ipha->ipha_src, zoneid); |
| /* |
| * Make sure that we haven't grabbed an interface that's DOWN. |
| */ |
| if (ipif != NULL) { |
| ipif_chosen = ipif_select_source(ipif->ipif_ill, |
| ipha->ipha_src, zoneid); |
| if (ipif_chosen != NULL) { |
| ipif_refrele(ipif); |
| ipif = ipif_chosen; |
| } |
| } |
| if (ipif == NULL) { |
| ip0dbg(("icmp_inbound: " |
| "No source for broadcast/multicast:\n" |
| "\tsrc 0x%x dst 0x%x ill %p " |
| "ipif_lcl_addr 0x%x\n", |
| ntohl(ipha->ipha_src), ntohl(ipha->ipha_dst), |
| (void *)ill, |
| ill->ill_ipif->ipif_lcl_addr)); |
| freemsg(first_mp); |
| return; |
| } |
| ASSERT(ipif != NULL && !ipif->ipif_isv6); |
| ipha->ipha_dst = ipif->ipif_src_addr; |
| ipif_refrele(ipif); |
| } |
| /* Reset time to live. */ |
| ipha->ipha_ttl = ip_def_ttl; |
| { |
| /* Swap source and destination addresses */ |
| ipaddr_t tmp; |
| |
| tmp = ipha->ipha_src; |
| ipha->ipha_src = ipha->ipha_dst; |
| ipha->ipha_dst = tmp; |
| } |
| ipha->ipha_ident = 0; |
| if (!IS_SIMPLE_IPH(ipha)) |
| icmp_options_update(ipha); |
| |
| /* |
| * ICMP echo replies should go out on the same interface |
| * the request came on as probes used by in.mpathd for detecting |
| * NIC failures are ECHO packets. We turn-off load spreading |
| * by setting ipsec_in_attach_if to B_TRUE, which is copied |
| * to ipsec_out_attach_if by ipsec_in_to_out called later in this |
| * function. This is in turn handled by ip_wput and ip_newroute |
| * to make sure that the packet goes out on the interface it came |
| * in on. If we don't turnoff load spreading, the packets might get |
| * dropped if there are no non-FAILED/INACTIVE interfaces for it |
| * to go out and in.mpathd would wrongly detect a failure or |
| * mis-detect a NIC failure for link failure. As load spreading |
| * can happen only if ill_group is not NULL, we do only for |
| * that case and this does not affect the normal case. |
| * |
| * We turn off load spreading only on echo packets that came from |
| * on-link hosts. If the interface route has been deleted, this will |
| * not be enforced as we can't do much. For off-link hosts, as the |
| * default routes in IPv4 does not typically have an ire_ipif |
| * pointer, we can't force MATCH_IRE_ILL in ip_wput/ip_newroute. |
| * Moreover, expecting a default route through this interface may |
| * not be correct. We use ipha_dst because of the swap above. |
| */ |
| onlink = B_FALSE; |
| if (icmph->icmph_type == ICMP_ECHO_REPLY && ill->ill_group != NULL) { |
| /* |
| * First, we need to make sure that it is not one of our |
| * local addresses. If we set onlink when it is one of |
| * our local addresses, we will end up creating IRE_CACHES |
| * for one of our local addresses. Then, we will never |
| * accept packets for them afterwards. |
| */ |
| src_ire = ire_ctable_lookup(ipha->ipha_dst, 0, IRE_LOCAL, |
| NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); |
| if (src_ire == NULL) { |
| ipif = ipif_get_next_ipif(NULL, ill); |
| if (ipif == NULL) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(mp); |
| return; |
| } |
| src_ire = ire_ftable_lookup(ipha->ipha_dst, 0, 0, |
| IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, |
| NULL, MATCH_IRE_ILL | MATCH_IRE_TYPE); |
| ipif_refrele(ipif); |
| if (src_ire != NULL) { |
| onlink = B_TRUE; |
| ire_refrele(src_ire); |
| } |
| } else { |
| ire_refrele(src_ire); |
| } |
| } |
| if (!mctl_present) { |
| /* |
| * This packet should go out the same way as it |
| * came in i.e in clear. To make sure that global |
| * policy will not be applied to this in ip_wput_ire, |
| * we attach a IPSEC_IN mp and clear ipsec_in_secure. |
| */ |
| ASSERT(first_mp == mp); |
| if ((first_mp = ipsec_in_alloc(B_TRUE)) == NULL) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| freemsg(mp); |
| return; |
| } |
| ii = (ipsec_in_t *)first_mp->b_rptr; |
| |
| /* This is not a secure packet */ |
| ii->ipsec_in_secure = B_FALSE; |
| if (onlink) { |
| ii->ipsec_in_attach_if = B_TRUE; |
| ii->ipsec_in_ill_index = |
| ill->ill_phyint->phyint_ifindex; |
| ii->ipsec_in_rill_index = |
| recv_ill->ill_phyint->phyint_ifindex; |
| } |
| first_mp->b_cont = mp; |
| } else if (onlink) { |
| ii = (ipsec_in_t *)first_mp->b_rptr; |
| ii->ipsec_in_attach_if = B_TRUE; |
| ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; |
| ii->ipsec_in_rill_index = recv_ill->ill_phyint->phyint_ifindex; |
| } else { |
| ii = (ipsec_in_t *)first_mp->b_rptr; |
| } |
| ii->ipsec_in_zoneid = zoneid; |
| ASSERT(zoneid != ALL_ZONES); |
| if (!ipsec_in_to_out(first_mp, ipha, NULL)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| return; |
| } |
| BUMP_MIB(&icmp_mib, icmpOutMsgs); |
| put(WR(q), first_mp); |
| } |
| |
| static ipaddr_t |
| icmp_get_nexthop_addr(ipha_t *ipha, ill_t *ill, zoneid_t zoneid, mblk_t *mp) |
| { |
| conn_t *connp; |
| connf_t *connfp; |
| ipaddr_t nexthop_addr = INADDR_ANY; |
| int hdr_length = IPH_HDR_LENGTH(ipha); |
| uint16_t *up; |
| uint32_t ports; |
| |
| up = (uint16_t *)((uchar_t *)ipha + hdr_length); |
| switch (ipha->ipha_protocol) { |
| case IPPROTO_TCP: |
| { |
| tcph_t *tcph; |
| |
| /* do a reverse lookup */ |
| tcph = (tcph_t *)((uchar_t *)ipha + hdr_length); |
| connp = ipcl_tcp_lookup_reversed_ipv4(ipha, tcph, |
| TCPS_LISTEN); |
| break; |
| } |
| case IPPROTO_UDP: |
| { |
| uint32_t dstport, srcport; |
| |
| ((uint16_t *)&ports)[0] = up[1]; |
| ((uint16_t *)&ports)[1] = up[0]; |
| |
| /* Extract ports in net byte order */ |
| dstport = htons(ntohl(ports) & 0xFFFF); |
| srcport = htons(ntohl(ports) >> 16); |
| |
| connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(dstport)]; |
| mutex_enter(&connfp->connf_lock); |
| connp = connfp->connf_head; |
| |
| /* do a reverse lookup */ |
| while ((connp != NULL) && |
| (!IPCL_UDP_MATCH(connp, dstport, |
| ipha->ipha_src, srcport, ipha->ipha_dst) || |
| !IPCL_ZONE_MATCH(connp, zoneid))) { |
| connp = connp->conn_next; |
| } |
| if (connp != NULL) |
| CONN_INC_REF(connp); |
| mutex_exit(&connfp->connf_lock); |
| break; |
| } |
| case IPPROTO_SCTP: |
| { |
| in6_addr_t map_src, map_dst; |
| |
| IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_src); |
| IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_dst); |
| ((uint16_t *)&ports)[0] = up[1]; |
| ((uint16_t *)&ports)[1] = up[0]; |
| |
| if ((connp = sctp_find_conn(&map_src, &map_dst, ports, |
| 0, zoneid)) == NULL) { |
| connp = ipcl_classify_raw(mp, IPPROTO_SCTP, |
| zoneid, ports, ipha); |
| } else { |
| CONN_INC_REF(connp); |
| SCTP_REFRELE(CONN2SCTP(connp)); |
| } |
| break; |
| } |
| default: |
| { |
| ipha_t ripha; |
| |
| ripha.ipha_src = ipha->ipha_dst; |
| ripha.ipha_dst = ipha->ipha_src; |
| ripha.ipha_protocol = ipha->ipha_protocol; |
| |
| connfp = &ipcl_proto_fanout[ipha->ipha_protocol]; |
| mutex_enter(&connfp->connf_lock); |
| connp = connfp->connf_head; |
| for (connp = connfp->connf_head; connp != NULL; |
| connp = connp->conn_next) { |
| if (IPCL_PROTO_MATCH(connp, |
| ipha->ipha_protocol, &ripha, ill, |
| 0, zoneid)) { |
| CONN_INC_REF(connp); |
| break; |
| } |
| } |
| mutex_exit(&connfp->connf_lock); |
| } |
| } |
| if (connp != NULL) { |
| if (connp->conn_nexthop_set) |
| nexthop_addr = connp->conn_nexthop_v4; |
| CONN_DEC_REF(connp); |
| } |
| return (nexthop_addr); |
| } |
| |
| /* Table from RFC 1191 */ |
| static int icmp_frag_size_table[] = |
| { 32000, 17914, 8166, 4352, 2002, 1496, 1006, 508, 296, 68 }; |
| |
| /* |
| * Process received ICMP Packet too big. |
| * After updating any IRE it does the fanout to any matching transport streams. |
| * Assumes the message has been pulled up till the IP header that caused |
| * the error. |
| * |
| * Returns B_FALSE on failure and B_TRUE on success. |
| */ |
| static boolean_t |
| icmp_inbound_too_big(icmph_t *icmph, ipha_t *ipha, ill_t *ill, |
| zoneid_t zoneid, mblk_t *mp, int iph_hdr_length) |
| { |
| ire_t *ire, *first_ire; |
| int mtu; |
| int hdr_length; |
| ipaddr_t nexthop_addr; |
| |
| ASSERT(icmph->icmph_type == ICMP_DEST_UNREACHABLE && |
| icmph->icmph_code == ICMP_FRAGMENTATION_NEEDED); |
| ASSERT(ill != NULL); |
| |
| hdr_length = IPH_HDR_LENGTH(ipha); |
| |
| /* Drop if the original packet contained a source route */ |
| if (ip_source_route_included(ipha)) { |
| return (B_FALSE); |
| } |
| /* |
| * Verify we have atleast ICMP_MIN_TP_HDR_LENGTH bytes of transport |
| * header. |
| */ |
| if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| mp->b_wptr) { |
| if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + |
| ICMP_MIN_TP_HDR_LEN - mp->b_rptr)) { |
| BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); |
| ip1dbg(("icmp_inbound_too_big: insufficient hdr\n")); |
| return (B_FALSE); |
| } |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| } |
| nexthop_addr = icmp_get_nexthop_addr(ipha, ill, zoneid, mp); |
| if (nexthop_addr != INADDR_ANY) { |
| /* nexthop set */ |
| first_ire = ire_ctable_lookup(ipha->ipha_dst, |
| nexthop_addr, 0, NULL, ALL_ZONES, MBLK_GETLABEL(mp), |
| MATCH_IRE_MARK_PRIVATE_ADDR | MATCH_IRE_GW); |
| } else { |
| /* nexthop not set */ |
| first_ire = ire_ctable_lookup(ipha->ipha_dst, 0, IRE_CACHE, |
| NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE); |
| } |
| |
| if (!first_ire) { |
| ip1dbg(("icmp_inbound_too_big: no route for 0x%x\n", |
| ntohl(ipha->ipha_dst))); |
| return (B_FALSE); |
| } |
| /* Check for MTU discovery advice as described in RFC 1191 */ |
| mtu = ntohs(icmph->icmph_du_mtu); |
| rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); |
| for (ire = first_ire; ire != NULL && ire->ire_addr == ipha->ipha_dst; |
| ire = ire->ire_next) { |
| /* |
| * Look for the connection to which this ICMP message is |
| * directed. If it has the IP_NEXTHOP option set, then the |
| * search is limited to IREs with the MATCH_IRE_PRIVATE |
| * option. Else the search is limited to regular IREs. |
| */ |
| if (((ire->ire_marks & IRE_MARK_PRIVATE_ADDR) && |
| (nexthop_addr != ire->ire_gateway_addr)) || |
| (!(ire->ire_marks & IRE_MARK_PRIVATE_ADDR) && |
| (nexthop_addr != INADDR_ANY))) |
| continue; |
| |
| mutex_enter(&ire->ire_lock); |
| if (icmph->icmph_du_zero == 0 && mtu > 68) { |
| /* Reduce the IRE max frag value as advised. */ |
| ip1dbg(("Received mtu from router: %d (was %d)\n", |
| mtu, ire->ire_max_frag)); |
| ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); |
| } else { |
| uint32_t length; |
| int i; |
| |
| /* |
| * Use the table from RFC 1191 to figure out |
| * the next "plateau" based on the length in |
| * the original IP packet. |
| */ |
| length = ntohs(ipha->ipha_length); |
| if (ire->ire_max_frag <= length && |
| ire->ire_max_frag >= length - hdr_length) { |
| /* |
| * Handle broken BSD 4.2 systems that |
| * return the wrong iph_length in ICMP |
| * errors. |
| */ |
| ip1dbg(("Wrong mtu: sent %d, ire %d\n", |
| length, ire->ire_max_frag)); |
| length -= hdr_length; |
| } |
| for (i = 0; i < A_CNT(icmp_frag_size_table); i++) { |
| if (length > icmp_frag_size_table[i]) |
| break; |
| } |
| if (i == A_CNT(icmp_frag_size_table)) { |
| /* Smaller than 68! */ |
| ip1dbg(("Too big for packet size %d\n", |
| length)); |
| ire->ire_max_frag = MIN(ire->ire_max_frag, 576); |
| ire->ire_frag_flag = 0; |
| } else { |
| mtu = icmp_frag_size_table[i]; |
| ip1dbg(("Calculated mtu %d, packet size %d, " |
| "before %d", mtu, length, |
| ire->ire_max_frag)); |
| ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); |
| ip1dbg((", after %d\n", ire->ire_max_frag)); |
| } |
| /* Record the new max frag size for the ULP. */ |
| icmph->icmph_du_zero = 0; |
| icmph->icmph_du_mtu = |
| htons((uint16_t)ire->ire_max_frag); |
| } |
| mutex_exit(&ire->ire_lock); |
| } |
| rw_exit(&first_ire->ire_bucket->irb_lock); |
| ire_refrele(first_ire); |
| return (B_TRUE); |
| } |
| |
| /* |
| * If the packet in error is Self-Encapsulated, icmp_inbound_error_fanout |
| * calls this function. |
| */ |
| static mblk_t * |
| icmp_inbound_self_encap_error(mblk_t *mp, int iph_hdr_length, int hdr_length) |
| { |
| ipha_t *ipha; |
| icmph_t *icmph; |
| ipha_t *in_ipha; |
| int length; |
| |
| ASSERT(mp->b_datap->db_type == M_DATA); |
| |
| /* |
| * For Self-encapsulated packets, we added an extra IP header |
| * without the options. Inner IP header is the one from which |
| * the outer IP header was formed. Thus, we need to remove the |
| * outer IP header. To do this, we pullup the whole message |
| * and overlay whatever follows the outer IP header over the |
| * outer IP header. |
| */ |
| |
| if (!pullupmsg(mp, -1)) |
| return (NULL); |
| |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| in_ipha = (ipha_t *)((uchar_t *)ipha + hdr_length); |
| |
| /* |
| * The length that we want to overlay is following the inner |
| * IP header. Subtracting the IP header + icmp header + outer |
| * IP header's length should give us the length that we want to |
| * overlay. |
| */ |
| length = msgdsize(mp) - iph_hdr_length - sizeof (icmph_t) - |
| hdr_length; |
| /* |
| * Overlay whatever follows the inner header over the |
| * outer header. |
| */ |
| bcopy((uchar_t *)in_ipha, (uchar_t *)ipha, length); |
| |
| /* Set the wptr to account for the outer header */ |
| mp->b_wptr -= hdr_length; |
| return (mp); |
| } |
| |
| /* |
| * Try to pass the ICMP message upstream in case the ULP cares. |
| * |
| * If the packet that caused the ICMP error is secure, we send |
| * it to AH/ESP to make sure that the attached packet has a |
| * valid association. ipha in the code below points to the |
| * IP header of the packet that caused the error. |
| * |
| * We handle ICMP_FRAGMENTATION_NEEDED(IFN) message differently |
| * in the context of IPSEC. Normally we tell the upper layer |
| * whenever we send the ire (including ip_bind), the IPSEC header |
| * length in ire_ipsec_overhead. TCP can deduce the MSS as it |
| * has both the MTU (ire_max_frag) and the ire_ipsec_overhead. |
| * Similarly, we pass the new MTU icmph_du_mtu and TCP does the |
| * same thing. As TCP has the IPSEC options size that needs to be |
| * adjusted, we just pass the MTU unchanged. |
| * |
| * IFN could have been generated locally or by some router. |
| * |
| * LOCAL : *ip_wput_ire -> icmp_frag_needed could have generated this. |
| * This happens because IP adjusted its value of MTU on an |
| * earlier IFN message and could not tell the upper layer, |
| * the new adjusted value of MTU e.g. Packet was encrypted |
| * or there was not enough information to fanout to upper |
| * layers. Thus on the next outbound datagram, ip_wput_ire |
| * generates the IFN, where IPSEC processing has *not* been |
| * done. |
| * |
| * *ip_wput_ire_fragmentit -> ip_wput_frag -> icmp_frag_needed |
| * could have generated this. This happens because ire_max_frag |
| * value in IP was set to a new value, while the IPSEC processing |
| * was being done and after we made the fragmentation check in |
| * ip_wput_ire. Thus on return from IPSEC processing, |
| * ip_wput_ipsec_out finds that the new length is > ire_max_frag |
| * and generates the IFN. As IPSEC processing is over, we fanout |
| * to AH/ESP to remove the header. |
| * |
| * In both these cases, ipsec_in_loopback will be set indicating |
| * that IFN was generated locally. |
| * |
| * ROUTER : IFN could be secure or non-secure. |
| * |
| * * SECURE : We use the IPSEC_IN to fanout to AH/ESP if the |
| * packet in error has AH/ESP headers to validate the AH/ESP |
| * headers. AH/ESP will verify whether there is a valid SA or |
| * not and send it back. We will fanout again if we have more |
| * data in the packet. |
| * |
| * If the packet in error does not have AH/ESP, we handle it |
| * like any other case. |
| * |
| * * NON_SECURE : If the packet in error has AH/ESP headers, |
| * we attach a dummy ipsec_in and send it up to AH/ESP |
| * for validation. AH/ESP will verify whether there is a |
| * valid SA or not and send it back. We will fanout again if |
| * we have more data in the packet. |
| * |
| * If the packet in error does not have AH/ESP, we handle it |
| * like any other case. |
| */ |
| static void |
| icmp_inbound_error_fanout(queue_t *q, ill_t *ill, mblk_t *mp, |
| icmph_t *icmph, ipha_t *ipha, int iph_hdr_length, int hdr_length, |
| boolean_t mctl_present, boolean_t ip_policy, ill_t *recv_ill, |
| zoneid_t zoneid) |
| { |
| uint16_t *up; /* Pointer to ports in ULP header */ |
| uint32_t ports; /* reversed ports for fanout */ |
| ipha_t ripha; /* With reversed addresses */ |
| mblk_t *first_mp; |
| ipsec_in_t *ii; |
| tcph_t *tcph; |
| conn_t *connp; |
| |
| ASSERT(ill != NULL); |
| |
| first_mp = mp; |
| if (mctl_present) { |
| mp = first_mp->b_cont; |
| ASSERT(mp != NULL); |
| |
| ii = (ipsec_in_t *)first_mp->b_rptr; |
| ASSERT(ii->ipsec_in_type == IPSEC_IN); |
| } else { |
| ii = NULL; |
| } |
| |
| switch (ipha->ipha_protocol) { |
| case IPPROTO_UDP: |
| /* |
| * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of |
| * transport header. |
| */ |
| if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| mp->b_wptr) { |
| if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + |
| ICMP_MIN_TP_HDR_LEN - mp->b_rptr)) { |
| goto discard_pkt; |
| } |
| icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; |
| ipha = (ipha_t *)&icmph[1]; |
| } |
| up = (uint16_t *)((uchar_t *)ipha + hdr_length); |
| |
| /* |
| * Attempt to find a client stream based on port. |
| * Note that we do a reverse lookup since the header is |
| * in the form we sent it out. |
| * The ripha header is only used for the IP_UDP_MATCH and we |
| * only set the src and dst addresses and protocol. |
| */ |
| ripha.ipha_src = ipha->ipha_dst; |
| ripha.ipha_dst = ipha->ipha_src; |
| ripha.ipha_protocol = ipha->ipha_protocol; |
| ((uint16_t *)&ports)[0] = up[1]; |
| ((uint16_t *)&ports)[1] = up[0]; |
| ip2dbg(("icmp_inbound_error: UDP %x:%d to %x:%d: %d/%d\n", |
| ntohl(ipha->ipha_src), ntohs(up[0]), |
| ntohl(ipha->ipha_dst), ntohs(up[1]), |
| icmph->icmph_type, icmph->icmph_code)); |
| |
| /* Have to change db_type after any pullupmsg */ |
| DB_TYPE(mp) = M_CTL; |
| |
| ip_fanout_udp(q, first_mp, ill, &ripha, ports, B_FALSE, 0, |
| mctl_present, ip_policy, recv_ill, zoneid); |
| return; |
| |
| case IPPROTO_TCP: |
| /* |
| * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of |
| * transport header. |
| */ |
| if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN > |
| mp->b_wptr) { |
| if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + |
| ICMP_MIN_TP_HDR_LEN - mp->b_rptr)) { |
|