diff --git a/.mailmap b/.mailmap index 13e4f504e17fbb7f91d6f87ae5589b989157ee73..bac1e6664a18ab860cbc135d988239c2d6f12391 100644 --- a/.mailmap +++ b/.mailmap @@ -135,6 +135,8 @@ Frank Rowand Frank Zago Gao Xiang Gao Xiang +Gao Xiang +Gao Xiang Gerald Schaefer Gerald Schaefer Gerald Schaefer @@ -371,6 +373,7 @@ Sean Nyekjaer Sebastian Reichel Sebastian Reichel Sedat Dilek +Seth Forshee Shiraz Hashim Shuah Khan Shuah Khan diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f2d26cb7e85391028376424b9daac448df69c015..c0fdb04a0435a6daf88492cbaf76adfb6aeac716 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5796,6 +5796,24 @@ expediting. Set to zero to disable automatic expediting. + srcutree.srcu_max_nodelay [KNL] + Specifies the number of no-delay instances + per jiffy for which the SRCU grace period + worker thread will be rescheduled with zero + delay. Beyond this limit, worker thread will + be rescheduled with a sleep delay of one jiffy. + + srcutree.srcu_max_nodelay_phase [KNL] + Specifies the per-grace-period phase, number of + non-sleeping polls of readers. Beyond this limit, + grace period worker thread will be rescheduled + with a sleep delay of one jiffy, between each + rescan of the readers, for a grace period phase. + + srcutree.srcu_retry_check_delay [KNL] + Specifies number of microseconds of non-sleeping + delay between each non-sleeping poll of readers. + srcutree.small_contention_lim [KNL] Specifies the number of update-side contention events per jiffy will be tolerated before diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index ed7fa76e7a404342f968c6123258b98d80532aec..d742ba6bd21195de7ba1e04aab6cc915f8f333cc 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -503,26 +503,108 @@ per-port PHY specific details: interface connection, MDIO bus location, etc. Driver development ================== -DSA switch drivers need to implement a dsa_switch_ops structure which will +DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will contain the various members described below. -``register_switch_driver()`` registers this dsa_switch_ops in its internal list -of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite. +Probing, registration and device lifetime +----------------------------------------- -Unless requested differently by setting the priv_size member accordingly, DSA -does not allocate any driver private context space. +DSA switches are regular ``device`` structures on buses (be they platform, SPI, +I2C, MDIO or otherwise). The DSA framework is not involved in their probing +with the device core. + +Switch registration from the perspective of a driver means passing a valid +``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the +switch driver's probing function. The following members must be valid in the +provided structure: + +- ``ds->dev``: will be used to parse the switch's OF node or platform data. + +- ``ds->num_ports``: will be used to create the port list for this switch, and + to validate the port indices provided in the OF node. + +- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA + method implementations. + +- ``ds->priv``: backpointer to a driver-private data structure which can be + retrieved in all further DSA method callbacks. + +In addition, the following flags in the ``dsa_switch`` structure may optionally +be configured to obtain driver-specific behavior from the DSA core. Their +behavior when set is documented through comments in ``include/net/dsa.h``. + +- ``ds->vlan_filtering_is_global`` + +- ``ds->needs_standalone_vlan_filtering`` + +- ``ds->configure_vlan_while_not_filtering`` + +- ``ds->untag_bridge_pvid`` + +- ``ds->assisted_learning_on_cpu_port`` + +- ``ds->mtu_enforcement_ingress`` + +- ``ds->fdb_isolation`` + +Internally, DSA keeps an array of switch trees (group of switches) global to +the kernel, and attaches a ``dsa_switch`` structure to a tree on registration. +The tree ID to which the switch is attached is determined by the first u32 +number of the ``dsa,member`` property of the switch's OF node (0 if missing). +The switch ID within the tree is determined by the second u32 number of the +same OF property (0 if missing). Registering multiple switches with the same +switch ID and tree ID is illegal and will cause an error. Using platform data, +a single switch and a single switch tree is permitted. + +In case of a tree with multiple switches, probing takes place asymmetrically. +The first N-1 callers of ``dsa_register_switch()`` only add their ports to the +port list of the tree (``dst->ports``), each port having a backpointer to its +associated switch (``dp->ds``). Then, these switches exit their +``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()`` +has determined that the tree is not yet complete (not all ports referenced by +DSA links are present in the tree's port list). The tree becomes complete when +the last switch calls ``dsa_register_switch()``, and this triggers the effective +continuation of initialization (including the call to ``ds->ops->setup()``) for +all switches within that tree, all as part of the calling context of the last +switch's probe function. + +The opposite of registration takes place when calling ``dsa_unregister_switch()``, +which removes a switch's ports from the port list of the tree. The entire tree +is torn down when the first switch unregisters. + +It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback +of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal +version of the full teardown performed by ``dsa_unregister_switch()``). +The reason is that DSA keeps a reference on the master net device, and if the +driver for the master device decides to unbind on shutdown, DSA's reference +will block that operation from finalizing. + +Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called, +but not both, and the device driver model permits the bus' ``remove()`` method +to be called even if ``shutdown()`` was already called. Therefore, drivers are +expected to implement a mutual exclusion method between ``remove()`` and +``shutdown()`` by setting their drvdata to NULL after any of these has run, and +checking whether the drvdata is NULL before proceeding to take any action. + +After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no +further callbacks via the provided ``dsa_switch_ops`` may take place, and the +driver may free the data structures associated with the ``dsa_switch``. Switch configuration -------------------- -- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported, - should be a valid value from the ``dsa_tag_protocol`` enum +- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is + supported, should be a valid value from the ``dsa_tag_protocol`` enum. + The returned information does not have to be static; the driver is passed the + CPU port number, as well as the tagging protocol of a possibly stacked + upstream switch, in case there are hardware limitations in terms of supported + tag formats. -- ``probe``: probe routine which will be invoked by the DSA platform device upon - registration to test for the presence/absence of a switch device. For MDIO - devices, it is recommended to issue a read towards internal registers using - the switch pseudo-PHY and return whether this is a supported device. For other - buses, return a non-NULL string +- ``change_tag_protocol``: when the default tagging protocol has compatibility + problems with the master or other issues, the driver may support changing it + at runtime, either through a device tree property or through sysfs. In that + case, further calls to ``get_tag_protocol`` should report the protocol in + current use. - ``setup``: setup function for the switch, this function is responsible for setting up the ``dsa_switch_ops`` private structure with all it needs: register maps, @@ -535,7 +617,17 @@ Switch configuration fully configured and ready to serve any kind of request. It is recommended to issue a software reset of the switch during this setup function in order to avoid relying on what a previous software agent such as a bootloader/firmware - may have previously configured. + may have previously configured. The method responsible for undoing any + applicable allocations or operations done here is ``teardown``. + +- ``port_setup`` and ``port_teardown``: methods for initialization and + destruction of per-port data structures. It is mandatory for some operations + such as registering and unregistering devlink port regions to be done from + these methods, otherwise they are optional. A port will be torn down only if + it has been previously set up. It is possible for a port to be set up during + probing only to be torn down immediately afterwards, for example in case its + PHY cannot be found. In this case, probing of the DSA switch continues + without that particular port. PHY devices and link management ------------------------------- @@ -635,26 +727,198 @@ Power management ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is disabled while being a bridge member +Address databases +----------------- + +Switching hardware is expected to have a table for FDB entries, however not all +of them are active at the same time. An address database is the subset (partition) +of FDB entries that is active (can be matched by address learning on RX, or FDB +lookup on TX) depending on the state of the port. An address database may +occasionally be called "FID" (Filtering ID) in this document, although the +underlying implementation may choose whatever is available to the hardware. + +For example, all ports that belong to a VLAN-unaware bridge (which is +*currently* VLAN-unaware) are expected to learn source addresses in the +database associated by the driver with that bridge (and not with other +VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a +VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having +the same MAC DA as the packet, which is present on another port member of the +same bridge. At the same time, the FDB lookup process must be able to not find +an FDB entry having the same MAC DA as the packet, if that entry points towards +a port which is a member of a different VLAN-unaware bridge (and is therefore +associated with a different address database). + +Similarly, each VLAN of each offloaded VLAN-aware bridge should have an +associated address database, which is shared by all ports which are members of +that VLAN, but not shared by ports belonging to different bridges that are +members of the same VID. + +In this context, a VLAN-unaware database means that all packets are expected to +match on it irrespective of VLAN ID (only MAC address lookup), whereas a +VLAN-aware database means that packets are supposed to match based on the VLAN +ID from the classified 802.1Q header (or the pvid if untagged). + +At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0, +whereas VLAN-aware FDB entries have non-zero VID values. Note that a +VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a +VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the +software bridge keeps separate address databases, and offloads to hardware the +FDB entries belonging to these databases, through switchdev, asynchronously +relative to the moment when the databases become active or inactive. + +When a user port operates in standalone mode, its driver should configure it to +use a separate database called a port private database. This is different from +the databases described above, and should impede operation as standalone port +(packet in, packet out to the CPU port) as little as possible. For example, +on ingress, it should not attempt to learn the MAC SA of ingress traffic, since +learning is a bridging layer service and this is a standalone port, therefore +it would consume useless space. With no address learning, the port private +database should be empty in a naive implementation, and in this case, all +received packets should be trivially flooded to the CPU port. + +DSA (cascade) and CPU ports are also called "shared" ports because they service +multiple address databases, and the database that a packet should be associated +to is usually embedded in the DSA tag. This means that the CPU port may +simultaneously transport packets coming from a standalone port (which were +classified by hardware in one address database), and from a bridge port (which +were classified to a different address database). + +Switch drivers which satisfy certain criteria are able to optimize the naive +configuration by removing the CPU port from the flooding domain of the switch, +and just program the hardware with FDB entries pointing towards the CPU port +for which it is known that software is interested in those MAC addresses. +Packets which do not match a known FDB entry will not be delivered to the CPU, +which will save CPU cycles required for creating an skb just to drop it. + +DSA is able to perform host address filtering for the following kinds of +addresses: + +- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are + associated with the port private database of the respective user port, + and the driver is notified to install them through ``port_fdb_add`` towards + the CPU port. + +- Secondary unicast and multicast MAC addresses of ports (addresses added + through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated + with the port private database of the respective user port. + +- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC + addresses of the bridge ports, for which packets must be terminated locally + and not forwarded. They are associated with the address database for that + bridge. + +- Static bridge FDB entries installed towards foreign (non-DSA) interfaces + present in the same bridge as some DSA switch ports. These are also + associated with the address database for that bridge. + +- Dynamically learned FDB entries on foreign interfaces present in the same + bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port`` + is set to true by the driver. These are associated with the address database + for that bridge. + +For various operations detailed below, DSA provides a ``dsa_db`` structure +which can be of the following types: + +- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to + the port private database of user port ``db->dp``. +- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge + ``db->bridge``. Separation between the VLAN-unaware database and the per-VID + databases of this bridge is expected to be done by the driver. +- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``. + Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future. + +The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``, +``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true. + +DSA associates each offloaded bridge and each offloaded LAG with a one-based ID +(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of +refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering +scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may +implement their own. + +Only the drivers which declare support for FDB isolation are notified of FDB +entries on the CPU port belonging to ``DSA_DB_PORT`` databases. +For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to +drivers even if they do not support FDB isolation. However, ``db->bridge.num`` +and ``db->lag.id`` are always set to 0 in that case (to denote the lack of +isolation, for refcounting purposes). + +Note that it is not mandatory for a switch driver to implement physically +separate address databases for each standalone user port. Since FDB entries in +the port private databases will always point to the CPU port, there is no risk +for incorrect forwarding decisions. In this case, all standalone ports may +share the same database, but the reference counting of host-filtered addresses +(not deleting the FDB entry for a port's MAC address if it's still in use by +another port) becomes the responsibility of the driver, because DSA is unaware +that the port databases are in fact shared. This can be achieved by calling +``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``. +The down side is that the RX filtering lists of each user port are in fact +shared, which means that user port A may accept a packet with a MAC DA it +shouldn't have, only because that MAC address was in the RX filtering list of +user port B. These packets will still be dropped in software, however. + Bridge layer ------------ +Offloading the bridge forwarding plane is optional and handled by the methods +below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may +be non-zero and exceeded, and in this case, joining a bridge port is still +possible, but the packet forwarding will take place in software, and the ports +under a software bridge must remain configured in the same way as for +standalone operation, i.e. have all bridging service functions (address +learning etc) disabled, and send all received packets to the CPU port only. + +Concretely, a port starts offloading the forwarding plane of a bridge once it +returns success to the ``port_bridge_join`` method, and stops doing so after +``port_bridge_leave`` has been called. Offloading the bridge means autonomously +learning FDB entries in accordance with the software bridge port's state, and +autonomously forwarding (or flooding) received packets without CPU intervention. +This is optional even when offloading a bridge port. Tagging protocol drivers +are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which +have already been autonomously forwarded in the forwarding domain of the +ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all +switch ports part of the same tree ID to be part of the same bridge forwarding +domain (capable of autonomous forwarding to each other). + +Offloading the TX forwarding process of a bridge is a distinct concept from +simply offloading its forwarding plane, and refers to the ability of certain +driver and tag protocol combinations to transmit a single skb coming from the +bridge device's transmit function to potentially multiple egress ports (and +thereby avoid its cloning in software). + +Packets for which the bridge requests this behavior are called data plane +packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol +driver's ``xmit`` function. Data plane packets are subject to FDB lookup, +hardware learning on the CPU port, and do not override the port STP state. +Additionally, replication of data plane packets (multicast, flooding) is +handled in hardware and the bridge driver will transmit a single skb for each +packet that may or may not need replication. + +When the TX forwarding offload is enabled, the tag protocol driver is +responsible to inject packets into the data plane of the hardware towards the +correct bridging domain (FID) that the port is a part of. The port may be +VLAN-unaware, and in this case the FID must be equal to the FID used by the +driver for its VLAN-unaware address database associated with that bridge. +Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed +that the packet is also VLAN-tagged with the VLAN ID that the bridge processed +this packet in. It is the responsibility of the hardware to untag the VID on +the egress-untagged ports, or keep the tag on the egress-tagged ones. + - ``port_bridge_join``: bridge layer function invoked when a given switch port is added to a bridge, this function should do what's necessary at the switch level to permit the joining port to be added to the relevant logical domain for it to ingress/egress traffic with other members of the bridge. + By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process + of this bridge is also offloaded. - ``port_bridge_leave``: bridge layer function invoked when a given switch port is removed from a bridge, this function should do what's necessary at the switch level to deny the leaving port from ingress/egress traffic from the - remaining bridge members. When the port leaves the bridge, it should be aged - out at the switch hardware for the switch to (re) learn MAC addresses behind - this port. + remaining bridge members. - ``port_stp_state_set``: bridge layer function invoked when a given switch port STP state is computed by the bridge layer and should be propagated to switch - hardware to forward/block/learn traffic. The switch driver is responsible for - computing a STP state change based on current and asked parameters and perform - the relevant ageing based on the intersection results + hardware to forward/block/learn traffic. - ``port_bridge_flags``: bridge layer function invoked when a port must configure its settings for e.g. flooding of unknown traffic or source address @@ -667,21 +931,11 @@ Bridge layer CPU port, and flooding towards the CPU port should also be enabled, due to a lack of an explicit address filtering mechanism in the DSA core. -- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after - ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to - a non-zero value. Returning success in this function activates the TX - forwarding offload bridge feature for this port, which enables the tagging - protocol driver to inject data plane packets towards the bridging domain that - the port is a part of. Data plane packets are subject to FDB lookup, hardware - learning on the CPU port, and do not override the port STP state. - Additionally, replication of data plane packets (multicast, flooding) is - handled in hardware and the bridge driver will transmit a single skb for each - packet that needs replication. The method is provided as a configuration - point for drivers that need to configure the hardware for enabling this - feature. - -- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver - leaves a bridge port which had the TX forwarding offload feature enabled. +- ``port_fast_age``: bridge layer function invoked when flushing the + dynamically learned FDB entries on the port is necessary. This is called when + transitioning from an STP state where learning should take place to an STP + state where it shouldn't, or when leaving a bridge, or when address learning + is turned off via ``port_bridge_flags``. Bridge VLAN filtering --------------------- @@ -697,55 +951,44 @@ Bridge VLAN filtering allowed. - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured - (tagged or untagged) for the given switch port. If the operation is not - supported by the hardware, this function should return ``-EOPNOTSUPP`` to - inform the bridge code to fallback to a software implementation. + (tagged or untagged) for the given switch port. The CPU port becomes a member + of a VLAN only if a foreign bridge port is also a member of it (and + forwarding needs to take place in software), or the VLAN is installed to the + VLAN group of the bridge device itself, for termination purposes + (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are + reference counted and removed when there is no user left. Drivers do not need + to manually install a VLAN on the CPU port. - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the given switch port -- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each VLAN the given port is a member - of. A switchdev object is used to carry the VID and bridge flags. - - ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a Forwarding Database entry, the switch hardware should be programmed with the specified address in the specified VLAN Id in the forwarding database - associated with this VLAN ID. If the operation is not supported, this - function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to - a software implementation. - -.. note:: VLAN ID 0 corresponds to the port private database, which, in the context - of DSA, would be its port-based VLAN, used by the associated bridge device. + associated with this VLAN ID. - ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a Forwarding Database entry, the switch hardware should be programmed to delete the specified MAC address from the specified VLAN ID if it was mapped into this port forwarding database -- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each MAC address known to be behind - the given port. A switchdev object is used to carry the VID and FDB info. +- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the + physical DSA port interfaces. Since DSA does not attempt to keep in sync its + hardware FDB entries with the software bridge, this method is implemented as + a means to view the entries visible on user ports in the hardware database. + The entries reported by this function have the ``self`` flag in the output of + the ``bridge fdb show`` command. - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install - a multicast database entry. If the operation is not supported, this function - should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a - software implementation. The switch hardware should be programmed with the + a multicast database entry. The switch hardware should be programmed with the specified address in the specified VLAN ID in the forwarding database associated with this VLAN ID. -.. note:: VLAN ID 0 corresponds to the port private database, which, in the context - of DSA, would be its port-based VLAN, used by the associated bridge device. - - ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a multicast database entry, the switch hardware should be programmed to delete the specified MAC address from the specified VLAN ID if it was mapped into this port forwarding database. -- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback - function that the driver has to call for each MAC address known to be behind - the given port. A switchdev object is used to carry the VID and MDB info. - Link aggregation ---------------- diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b3a534ed0e7c5fa1de2f847a67c8308f8b8342cd..66c72230eaade96599bbfff2914b98e9011a3df8 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1052,11 +1052,7 @@ udp_rmem_min - INTEGER Default: 4K udp_wmem_min - INTEGER - Minimal size of send buffer used by UDP sockets in moderation. - Each UDP socket is able to use the size for sending data, even if - total pages of UDP sockets exceed udp_mem pressure. The unit is byte. - - Default: 4K + UDP does not have tx memory accounting and this tunable has no effect. RAW variables ============= diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6e090fb96a0edb4d83c386b01f8a839056cc9206..98a2839303071188de011d79a05d3d226a0125f9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5658,7 +5658,7 @@ by a string of size ``name_size``. #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) - #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/MAINTAINERS b/MAINTAINERS index 651616ed8ae25e8e790915721d86c6404cb171fa..64379c699903bc022dc7656eb02dadc8a18818b1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15849,7 +15849,7 @@ PIN CONTROLLER - FREESCALE M: Dong Aisheng M: Fabio Estevam M: Shawn Guo -M: Stefan Agner +M: Jacky Bai R: Pengutronix Kernel Team L: linux-gpio@vger.kernel.org S: Maintained diff --git a/Makefile b/Makefile index 00fd80c5dd6e65bd332632dbc1ebb3c35015129f..b79c1c18149d38ff8798df248d73ec819c8a8dfe 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index fcf9a41a4ef5b15efaac04bc85c420a435603fb8..71b9272acb28bf0e4f23226954c1d2a3be1fd256 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -438,6 +438,13 @@ config MMU_GATHER_PAGE_SIZE config MMU_GATHER_NO_RANGE bool + select MMU_GATHER_MERGE_VMAS + +config MMU_GATHER_NO_FLUSH_CACHE + bool + +config MMU_GATHER_MERGE_VMAS + bool config MMU_GATHER_NO_GATHER bool diff --git a/arch/csky/include/asm/tlb.h b/arch/csky/include/asm/tlb.h index 3498e65f59f8eb15e45b24d8dfa9844d4b4cf940..702861c6887486adf5166646541a7e626dd2cbe6 100644 --- a/arch/csky/include/asm/tlb.h +++ b/arch/csky/include/asm/tlb.h @@ -4,21 +4,6 @@ #define __ASM_CSKY_TLB_H #include - -#define tlb_start_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \ - } while (0) - -#define tlb_end_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \ - } while (0) - -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include #endif /* __ASM_CSKY_TLB_H */ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 53a912befb626b53bfb866fd63d8b6f62bd25c26..b57daee98b89c0988f200b7d0217350d939fb1f2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -108,6 +108,7 @@ config LOONGARCH select TRACE_IRQFLAGS_SUPPORT select USE_PERCPU_NUMA_NODE_ID select ZONE_DMA32 + select MMU_GATHER_MERGE_VMAS if MMU config 32BIT bool diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index 4f629ae9d5a9d0609d63e9140bdef3acb75862d6..dd24f5898f651cf9e264c9d6bf23fc8e3190e3ac 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -137,16 +137,6 @@ static inline void invtlb_all(u32 op, u32 info, u64 addr) ); } -/* - * LoongArch doesn't need any special per-pte or per-vma handling, except - * we need to flush cache for area to be unmapped. - */ -#define tlb_start_vma(tlb, vma) \ - do { \ - if (!(tlb)->fullmm) \ - flush_cache_range(vma, vma->vm_start, vma->vm_end); \ - } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) static void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7aa12e88c5800df99123852f15c09e3054951e2e..c235648fae23afd14393a24859927b52012a7ee9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -256,6 +256,7 @@ config PPC select IRQ_FORCED_THREADING select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64 diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 09a9ae5f3656173698fe941a0523cf7e195b392e..b3de6102a90779739a598d9784ab9b55ab6e1ee0 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -19,8 +19,6 @@ #include -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry #define tlb_flush tlb_flush diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 34cf8a598617b8cd0585d31a2068c1a97b8b8069..a4c46a03d2e26109eb4d74ab6c08d0f3d7bb9e16 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -73,6 +73,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 039b92abf046c29c70eec876494ae0f8ffdfaebb..f72540bd14a3be3f05222d5f0bacc31b5f3f6f44 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -35,7 +35,7 @@ led1 { gpio-keys { compatible = "gpio-keys"; - key0 { + key { label = "KEY0"; linux,code = ; gpios = <&gpio0 10 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index b9e30df127fefedf8cf9203733fa09d83babbebb..8abdbe26a1d098583f99b4e87116b5dbc6acf45b 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -47,7 +47,7 @@ led2 { gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = ; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index 8d23401b0bbb6bcadbc293f45fc9247cbac5586b..3c6df1ecf76fd7f3534fdc557837ffad5e2ad1f1 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -52,7 +52,7 @@ led2 { gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = ; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 24fd83b43d9d546e7f673333b61892f68d78457b..03c9843d503e6fc23cd3196c82aae6a96164a543 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -46,19 +46,19 @@ led2 { gpio-keys { compatible = "gpio-keys"; - up { + key-up { label = "UP"; linux,code = ; gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>; }; - press { + key-press { label = "PRESS"; linux,code = ; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; }; - down { + key-down { label = "DOWN"; linux,code = ; gpios = <&gpio0 1 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 25341f38292aabf66a2a0b2b1ec70dffc5e004f8..7164ad06317812d7ee53fc81871345b98e69a77b 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -23,7 +23,7 @@ chosen { gpio-keys { compatible = "gpio-keys"; - boot { + key-boot { label = "BOOT"; linux,code = ; gpios = <&gpio0 0 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c71d6591d53988d1565b2688dd7311fbab5c3b99..33bb60a354cd20633fd40cd8b5faa01c407d3814 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -78,7 +78,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 9cb85095fd459fb5e9cea5d1d6e62e8c4b076128..0cb94992c15b32a852716ca877d95b5f14f63c6f 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -349,7 +349,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { const char *strtab, *name, *shstrtab; const Elf_Shdr *sechdrs; - Elf_Rela *relas; + Elf64_Rela *relas; int i, r_type; /* String & section header string table */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8cd9e56c629ba8c374ab9cd0c0117663809cbd27..5a1a8dfda6f8e5d0ee45acd83574187c2661cf8d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -204,6 +204,7 @@ config S390 select IOMMU_SUPPORT if PCI select MMU_GATHER_NO_GATHER select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_MERGE_VMAS select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 2c6e1c6ecbe780284c2374d7ee0d4394b1a66fda..4120c428dc378ffdc0a656eb69535338af47dc9a 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017, 2020 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger * @@ -14,6 +14,7 @@ #ifdef CONFIG_ARCH_RANDOM #include +#include #include #include @@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; @@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); atomic64_add(sizeof(*v), &s390_arch_random_counter); return true; diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fe6407f0eb1b7e4ef4a51e67c446ce20de96985e..3a5c8fb590e55ddfda72e6908ced1f4626e2c7c0 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) - #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb #define pmd_free_tlb pmd_free_tlb diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ba449c47effd8d975abf2f1130acef46e46662f9..4f7d1dfbc6086fbd01620e14ae76340c70984c92 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -67,6 +67,8 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select MMU_GATHER_RCU_TABLE_FREE if SMP + select MMU_GATHER_MERGE_VMAS + select MMU_GATHER_NO_FLUSH_CACHE select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index 779a5a0f06080216cc0ce5ce30ca638b365b3b3d..3037187482db7ebe3334f99cb3e4e7af095933b7 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -22,8 +22,6 @@ void smp_flush_tlb_mm(struct mm_struct *mm); void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *); void flush_tlb_pending(void); -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() /* diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e58798f636d474d277af5eb59dc9b6a42b2c31d0..52a7f91527fe03fb1930d6a7cb7ae76269cacc06 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -245,6 +245,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION @@ -2473,7 +2474,7 @@ config RETHUNK bool "Enable return-thunks" depends on RETPOLINE && CC_HAS_RETURN_THUNK select OBJTOOL if HAVE_OBJTOOL - default y + default y if X86_64 help Compile the kernel with the return-thunks compiler option to guard against kernel-to-user data leaks by avoiding return speculation. @@ -2482,21 +2483,21 @@ config RETHUNK config CPU_UNRET_ENTRY bool "Enable UNRET on kernel entry" - depends on CPU_SUP_AMD && RETHUNK + depends on CPU_SUP_AMD && RETHUNK && X86_64 default y help Compile the kernel with support for the retbleed=unret mitigation. config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" - depends on CPU_SUP_AMD + depends on CPU_SUP_AMD && X86_64 default y help Compile the kernel with support for the retbleed=ibpb mitigation. config CPU_IBRS_ENTRY bool "Enable IBRS on kernel entry" - depends on CPU_SUP_INTEL + depends on CPU_SUP_INTEL && X86_64 default y help Compile the kernel with support for the spectre_v2=ibrs mitigation. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f40dad30d5084f50aaec155db49df961681cdd8..7854685c5f25b7926a6a722af83c134bd6ec6cd5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -27,6 +27,7 @@ RETHUNK_CFLAGS := -mfunction-return=thunk-extern RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) endif +export RETHUNK_CFLAGS export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 13179f31fe10facf139f4c7c00fd8627363a5594..4f70fb6c2c1eb78e35f717ffb3085ee9f459e145 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -278,9 +278,9 @@ enum { }; /* - * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in - * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when - * TSX is not supported they have no consistent behavior: + * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x + * are the TSX flags when TSX is supported, but when TSX is not supported + * they have no consistent behavior: * * - For wrmsr(), bits 61:62 are considered part of the sign extension. * - For HW updates (branch captures) bits 61:62 are always OFF and are not @@ -288,7 +288,7 @@ enum { * * Therefore, if: * - * 1) LBR has TSX format + * 1) LBR format LBR_FORMAT_EIP_FLAGS2 * 2) CPU has no TSX support enabled * * ... then any value passed to wrmsr() must be sign extended to 63 bits and any @@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void) bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM); - return !tsx_support && x86_pmu.lbr_has_tsx; + return !tsx_support; } static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); @@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_map = hsw_lbr_sel_map; x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); - - if (lbr_from_signext_quirk_needed()) - static_branch_enable(&lbr_from_quirk_key); } /* skylake */ @@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void) switch (x86_pmu.intel_cap.lbr_format) { case LBR_FORMAT_EIP_FLAGS2: x86_pmu.lbr_has_tsx = 1; - fallthrough; + x86_pmu.lbr_from_flags = 1; + if (lbr_from_signext_quirk_needed()) + static_branch_enable(&lbr_from_quirk_key); + break; + case LBR_FORMAT_EIP_FLAGS: x86_pmu.lbr_from_flags = 1; break; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 00f5227c8459870d142a4dce759a997467448b50..a77b915d36a8ed8885f4c33e11597a3950102747 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -302,6 +302,7 @@ #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10a3bfc1eb230e7c1c7f49e29d7e7f453341b75c..38a3e86e665ef25310ac60bca516f58d84201680 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -297,6 +297,8 @@ do { \ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \ + X86_FEATURE_USE_IBPB_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 1bfe979bb9bcd25c1bc8d5c9ec82e581bb21bafc..580636cdc257b78930db0d9b9236cfa9fd62ed21 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -2,9 +2,6 @@ #ifndef _ASM_X86_TLB_H #define _ASM_X86_TLB_H -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) - #define tlb_flush tlb_flush static inline void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d6858533e6e595bfc2a0de630e503b0f3d6a460d..62f6b8b7c4a5285933b80013ff6fe2aa7bf6e183 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -555,7 +555,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) dest = addr + insn.length + insn.immediate.value; if (__static_call_fixup(addr, op, dest) || - WARN_ON_ONCE(dest != &__x86_return_thunk)) + WARN_ONCE(dest != &__x86_return_thunk, + "missing return thunk: %pS-%pS: %*ph", + addr, dest, 5, addr)) continue; DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index aa34f908c39ff0136dd9f0055167611c377eabd7..6454bc767f0fdfa3d69497b1fc54c624098ef091 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -975,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; } #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" #ifdef CONFIG_BPF_SYSCALL void unpriv_ebpf_notify(int new_state) @@ -1415,6 +1416,8 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_IBRS: setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); break; case SPECTRE_V2_LFENCE: @@ -1516,7 +1519,16 @@ static void __init spectre_v2_select_mitigation(void) * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { + + if (retbleed_cmd != RETBLEED_CMD_IBPB) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW); + pr_info("Enabling Speculation Barrier for firmware calls\n"); + } + + } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 143e37298d8a41f7c8616cb2b7d77188d96dbd1e..e5fa335a4ea794959f4a5aec13c747bc9e66ce0f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6029,6 +6029,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; break; case KVM_CAP_X86_USER_SPACE_MSR: + r = -EINVAL; + if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER)) + break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; break; @@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) return -EFAULT; + if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) empty &= !filter.ranges[i].nmsrs; diff --git a/certs/Kconfig b/certs/Kconfig index 476755703cf8b44c9c12425f7b347409baef7170..bf9b511573d75135182a3f15711d617c5e335e38 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -43,6 +43,7 @@ config SYSTEM_TRUSTED_KEYRING bool "Provide system-wide ring of trusted keys" depends on KEYS depends on ASYMMETRIC_KEY_TYPE + depends on X509_CERTIFICATE_PARSER help Provide a system keyring to which trusted keys can be added. Keys in the keyring are considered to be trusted. Keys may be added at will diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 6ff1901d7d4360f901f618bfeb90fcdae5367307..3c6d4ef87be0f178d0d9f0e99abaac7cb4ecd9ec 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -782,7 +782,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); - goto out_free; + if (!cpc_supported_by_cpu()) + goto out_free; } addr = ioremap(gas_t->address, gas_t->bit_width/8); @@ -809,7 +810,8 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } if (!osc_cpc_flexible_adr_space_confirmed) { pr_debug("Flexible address space capability not supported\n"); - goto out_free; + if (!cpc_supported_by_cpu()) + goto out_free; } } else { if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) { diff --git a/drivers/clk/clk-lan966x.c b/drivers/clk/clk-lan966x.c index d1535ac13e8942d10dd8897d4c2690d2fed2cb71..81cb90955d68b9d572b39b1fe89bd435c8517bb6 100644 --- a/drivers/clk/clk-lan966x.c +++ b/drivers/clk/clk-lan966x.c @@ -213,7 +213,7 @@ static int lan966x_gate_clk_register(struct device *dev, hw_data->hws[i] = devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name, - "lan966x", 0, base, + "lan966x", 0, gate_base, clk_gate_desc[idx].bit_idx, 0, &clk_gate_lock); diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 08bc52c3cdcbedb19c170488caa101daf9f9f932..ecd7d169470b068ddbbc9685ae33a1f0a860cd32 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -351,6 +351,9 @@ static const struct regmap_config pca953x_i2c_regmap = { .reg_bits = 8, .val_bits = 8, + .use_single_read = true, + .use_single_write = true, + .readable_reg = pca953x_readable_register, .writeable_reg = pca953x_writeable_register, .volatile_reg = pca953x_volatile_register, @@ -906,15 +909,18 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert) { DECLARE_BITMAP(val, MAX_LINE); + u8 regaddr; int ret; - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip) - 1); if (ret) goto out; @@ -1127,14 +1133,14 @@ static int pca953x_regcache_sync(struct device *dev) * sync these registers first and only then sync the rest. */ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret); return ret; } regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); - ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret); return ret; @@ -1144,7 +1150,7 @@ static int pca953x_regcache_sync(struct device *dev) if (chip->driver_data & PCA_PCAL) { regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT latch registers: %d\n", ret); @@ -1153,7 +1159,7 @@ static int pca953x_regcache_sync(struct device *dev) regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0); ret = regcache_sync_region(chip->regmap, regaddr, - regaddr + NBANK(chip)); + regaddr + NBANK(chip) - 1); if (ret) { dev_err(dev, "Failed to sync INT mask registers: %d\n", ret); diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index b6d3a57e27edc06b3ded115df1e676b7b5566975..7f8e2fed29884dca7330b9bc3961bbf8f0b40340 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -99,7 +99,7 @@ static inline void xgpio_set_value32(unsigned long *map, int bit, u32 v) const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5); map[index] &= ~(0xFFFFFFFFul << offset); - map[index] |= v << offset; + map[index] |= (unsigned long)v << offset; } static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e8bd21dd5a3ee3789a7858c4757ce..4608599ba6bb54d6a3d64c93007ee6b9f53f1e1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1364,16 +1364,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6edb83464fbd2fcfbfc5d16ec3eab..2168163aad2d386014040bc666e528aaeee89ab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf4a5bc8a92d261b02d92ae2982630..9caea1688fc322db41d4a05538f043265cac9cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e93be60aacd802d6b24843c29fa4a..d8f1335bc68f416154b6ee3aae6f5a4028f08cbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93ac33a8de9aab9ae82be13205363f0c97fcee92..3087dd1a1856cd00fb9c35577ae491395414e346 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1653,7 +1653,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work(); #endif - if (dc_enable_dmub_notifications(adev->dm.dc)) { + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { @@ -1689,6 +1689,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point, for + * example HPD from DPIA. + */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) + dc_enable_dmub_outbox(adev->dm.dc); + /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); @@ -2678,9 +2685,6 @@ static int dm_resume(void *handle) */ link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - r = dm_dmub_hw_init(adev); if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); @@ -2698,6 +2702,11 @@ static int dm_resume(void *handle) } } + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + WARN_ON(!dc_commit_state(dm->dc, dc_state)); dm_gpureset_commit_state(dm->cached_dc_state, dm); @@ -2719,13 +2728,15 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); - /* Re-enable outbox interrupts for DPIA. */ - if (dc_enable_dmub_notifications(adev->dm.dc)) - amdgpu_dm_outbox_init(adev); - /* Before powering on DC we need to re-initialize DMUB. */ dm_dmub_hw_resume(adev); + /* Re-enable outbox interrupts for DPIA. */ + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + amdgpu_dm_outbox_init(adev); + dc_enable_dmub_outbox(adev->dm.dc); + } + /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c index d5962a34c01d5ff1a82b9044aa2382c3ef07e6fc..e5fc875990c4f3308e0134f9dc0455e84d1f2ec0 100644 --- a/drivers/gpu/drm/drm_gem_ttm_helper.c +++ b/drivers/gpu/drm/drm_gem_ttm_helper.c @@ -64,8 +64,13 @@ int drm_gem_ttm_vmap(struct drm_gem_object *gem, struct iosys_map *map) { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + int ret; + + dma_resv_lock(gem->resv, NULL); + ret = ttm_bo_vmap(bo, map); + dma_resv_unlock(gem->resv); - return ttm_bo_vmap(bo, map); + return ret; } EXPORT_SYMBOL(drm_gem_ttm_vmap); @@ -82,7 +87,9 @@ void drm_gem_ttm_vunmap(struct drm_gem_object *gem, { struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + dma_resv_lock(gem->resv, NULL); ttm_bo_vunmap(bo, map); + dma_resv_unlock(gem->resv); } EXPORT_SYMBOL(drm_gem_ttm_vunmap); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 09f82545789f194b305cbadd42305d82d66b22d4..44e7339e7a4aea6fd4d33e3119d147dbe6ae6bcc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -273,10 +273,17 @@ struct intel_context { u8 child_index; /** @guc: GuC specific members for parallel submission */ struct { - /** @wqi_head: head pointer in work queue */ + /** @wqi_head: cached head pointer in work queue */ u16 wqi_head; - /** @wqi_tail: tail pointer in work queue */ + /** @wqi_tail: cached tail pointer in work queue */ u16 wqi_tail; + /** @wq_head: pointer to the actual head in work queue */ + u32 *wq_head; + /** @wq_tail: pointer to the actual head in work queue */ + u32 *wq_tail; + /** @wq_status: pointer to the status in work queue */ + u32 *wq_status; + /** * @parent_page: page in context state (ce->state) used * by parent for work queue, process descriptor diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 86f7a9ac1c394b4a4a9d90bcb1e2c4fb1068119b..2b0266cab66b93a579a65b5f36a1268a4e44d1b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq) i915_request_put(rq); } +static u32 map_i915_prio_to_lrc_desc_prio(int prio) +{ + if (prio > I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_HIGH; + else if (prio < I915_PRIORITY_NORMAL) + return GEN12_CTX_PRIORITY_LOW; + else + return GEN12_CTX_PRIORITY_NORMAL; +} + static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq) desc = ce->lrc.desc; if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY) - desc |= lrc_desc_priority(rq_prio(rq)); + desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq)); /* * WaIdleLiteRestore:bdw,skl diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 31be734010db3bc5cd7234caff95a42268e035a3..a390f0813c8b64e5b0ee08a48a9ca7e875ad9e94 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -111,16 +111,6 @@ enum { #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 -static inline u32 lrc_desc_priority(int prio) -{ - if (prio > I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_HIGH; - else if (prio < I915_PRIORITY_NORMAL) - return GEN12_CTX_PRIORITY_LOW; - else - return GEN12_CTX_PRIORITY_NORMAL; -} - static inline void lrc_runtime_start(struct intel_context *ce) { struct intel_context_stats *stats = &ce->stats; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 4ef9990ed7f8be3040b3fc92d56cd3fb2f00bae4..29ef8afc8c2e45699f88fd064a40051c329e5ce2 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,6 +122,9 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 966e69a8b1c124d3b9ab107b9f65f774589c2b10..9feda105f9131f7e6f1398111a1de7d17583c1ee 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -170,6 +170,11 @@ struct intel_guc { /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; + /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */ + struct i915_vma *lrc_desc_pool_v69; + /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */ + void *lrc_desc_pool_vaddr_v69; + /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 42cb7a9a6199c79f8350efd259bbeb0fd46b57c3..89a7e5ec0614eaf2f32cabb8ae76bdad1ea5e3ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -203,6 +203,20 @@ struct guc_wq_item { u32 fence_id; } __packed; +struct guc_process_desc_v69 { + u32 stage_id; + u64 db_base_addr; + u32 head; + u32 tail; + u32 error_offset; + u64 wq_base_addr; + u32 wq_size_bytes; + u32 wq_status; + u32 engine_presence; + u32 priority; + u32 reserved[36]; +} __packed; + struct guc_sched_wq_desc { u32 head; u32 tail; @@ -227,6 +241,37 @@ struct guc_ctxt_registration_info { }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0) + +/* + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. + */ +struct guc_lrc_desc_v69 { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; + u32 priority; + u32 process_desc; + u32 wq_addr; + u32 wq_size; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; +} __packed; + /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { u32 kl; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1726f0f19901097551de288ec80bdf37a4744a52..9ffb343d0f7973cea811fec8564c0014170a66d5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -414,12 +414,15 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_sched_wq_desc wq_desc; + union guc_descs { + struct guc_sched_wq_desc wq_desc; + struct guc_process_desc_v69 pdesc; + } descs; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + u8 unused[WQ_OFFSET - sizeof(union guc_descs) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } +static struct guc_process_desc_v69 * +__get_process_desc_v69(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->descs.pdesc; +} + static struct guc_sched_wq_desc * -__get_wq_desc(struct intel_context *ce) +__get_wq_desc_v70(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->wq_desc; + return &ps->descs.wq_desc; } -static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) { /* * Check for space in work queue. Caching a value of head pointer in @@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) return ce; } +static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; + + if (!base) + return NULL; + + GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); + + return &base[index]; +} + +static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * + GUC_MAX_CONTEXT_ID); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, + (void **)&guc->lrc_desc_pool_vaddr_v69); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) +{ + if (!guc->lrc_desc_pool_vaddr_v69) + return; + + guc->lrc_desc_pool_vaddr_v69 = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); +} + static inline bool guc_submission_initialized(struct intel_guc *guc) { return guc->submission_initialized; } +static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) +{ + struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); + + if (desc) + memset(desc, 0, sizeof(*desc)); +} + static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); @@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) if (unlikely(!guc_submission_initialized(guc))) return; + _reset_lrc_desc_v69(guc, id); + /* * xarray API doesn't have xa_erase_irqsave wrapper, so calling * the lower level functions directly. @@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } -static int guc_context_policy_init(struct intel_context *ce, bool loop); +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(context_guc_id_invalid(ce)); if (context_policy_required(ce)) { - err = guc_context_policy_init(ce, false); + err = guc_context_policy_init_v70(ce, false); if (err) return err; } @@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static void write_wqi(struct intel_context *ce, u32 wqi_size) { BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); @@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); - u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); + u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(wq_desc, ce, wqi_size); + wqi = get_wq_pointer(ce, wqi_size); if (!wqi) return -EBUSY; @@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(wq_desc, ce, wqi_size); + write_wqi(ce, wqi_size); return 0; } @@ -1868,20 +1919,34 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + int ret; if (guc->submission_initialized) return 0; + if (guc->fw.major_ver_found < 70) { + ret = guc_lrc_desc_pool_create_v69(guc); + if (ret) + return ret; + } + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; + if (!guc->submission_state.guc_ids_bitmap) { + ret = -ENOMEM; + goto destroy_pool; + } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); guc->submission_initialized = true; return 0; + +destroy_pool: + guc_lrc_desc_pool_destroy_v69(guc); + + return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1890,6 +1955,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) return; guc_flush_destroyed_contexts(guc); + guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); guc->submission_initialized = false; @@ -2147,10 +2213,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int __guc_action_register_multi_lrc(struct intel_guc *guc, - struct intel_context *ce, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc_v69); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); +} + +static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, + struct intel_context *ce, + struct guc_ctxt_registration_info *info, + bool loop) { struct intel_context *child; u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; @@ -2190,9 +2280,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, len, 0, loop); } -static int __guc_action_register_context(struct intel_guc *guc, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_context_v69(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int __guc_action_register_context_v70(struct intel_guc *guc, + struct guc_ctxt_registration_info *info, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, @@ -2213,24 +2318,52 @@ static int __guc_action_register_context(struct intel_guc *guc, 0, loop); } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info); +static void prepare_context_registration_info_v69(struct intel_context *ce); +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info); -static int register_context(struct intel_context *ce, bool loop) +static int +register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) +{ + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + + ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); + + prepare_context_registration_info_v69(ce); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, + offset, loop); + else + return __guc_action_register_context_v69(guc, ce->guc_id.id, + offset, loop); +} + +static int +register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) { struct guc_ctxt_registration_info info; + + prepare_context_registration_info_v70(ce, &info); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); + else + return __guc_action_register_context_v70(guc, &info, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ struct intel_guc *guc = ce_to_guc(ce); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - prepare_context_registration_info(ce, &info); - - if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); + if (guc->fw.major_ver_found >= 70) + ret = register_context_v70(guc, ce, loop); else - ret = __guc_action_register_context(guc, &info, loop); + ret = register_context_v69(guc, ce, loop); + if (likely(!ret)) { unsigned long flags; @@ -2238,7 +2371,8 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - guc_context_policy_init(ce, loop); + if (guc->fw.major_ver_found >= 70) + guc_context_policy_init_v70(ce, loop); } return ret; @@ -2335,7 +2469,7 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, 0, loop); } -static int guc_context_policy_init(struct intel_context *ce, bool loop) +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2394,8 +2528,108 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) return ret; } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info) +static void guc_context_policy_init_v69(struct intel_engine_cs *engine, + struct guc_lrc_desc_v69 *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + +static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) +{ + /* + * this matches the mapping we do in map_i915_prio_to_guc_prio() + * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) + */ + switch (prio) { + default: + MISSING_CASE(prio); + fallthrough; + case GUC_CLIENT_PRIORITY_KMD_NORMAL: + return GEN12_CTX_PRIORITY_NORMAL; + case GUC_CLIENT_PRIORITY_NORMAL: + return GEN12_CTX_PRIORITY_LOW; + case GUC_CLIENT_PRIORITY_HIGH: + case GUC_CLIENT_PRIORITY_KMD_HIGH: + return GEN12_CTX_PRIORITY_HIGH; + } +} + +static void prepare_context_registration_info_v69(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 ctx_id = ce->guc_id.id; + struct guc_lrc_desc_v69 *desc; + struct intel_context *child; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + desc = __get_lrc_desc_v69(guc, ctx_id); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = engine->logical_mask; + desc->hw_context_desc = ce->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + + /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc_v69 *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc_v69(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + ce->parallel.guc.wq_head = &pdesc->head; + ce->parallel.guc.wq_tail = &pdesc->tail; + ce->parallel.guc.wq_status = &pdesc->wq_status; + + for_each_child(ce, child) { + desc = __get_lrc_desc_v69(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + } + + clear_children_join_go_memory(ce); + } +} + +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2420,6 +2654,8 @@ static void prepare_context_registration_info(struct intel_context *ce, */ info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) + info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* @@ -2443,10 +2679,14 @@ static void prepare_context_registration_info(struct intel_context *ce, info->wq_base_hi = upper_32_bits(wq_base_offset); info->wq_size = WQ_SIZE; - wq_desc = __get_wq_desc(ce); + wq_desc = __get_wq_desc_v70(ce); memset(wq_desc, 0, sizeof(*wq_desc)); wq_desc->wq_status = WQ_STATUS_ACTIVE; + ce->parallel.guc.wq_head = &wq_desc->head; + ce->parallel.guc.wq_tail = &wq_desc->tail; + ce->parallel.guc.wq_status = &wq_desc->wq_status; + clear_children_join_go_memory(ce); } } @@ -2761,11 +3001,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, guc_id); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) @@ -3013,11 +3263,21 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, ce->guc_id.id); - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, + ce->guc_id.id, + ce->guc_state.prio, + }; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_set_prio(struct intel_guc *guc, @@ -4527,17 +4787,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); - drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(wq_desc->head)); - drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(wq_desc->tail)); - drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(wq_desc->wq_status)); + + if (ce->parallel.guc.wq_status) { + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(*ce->parallel.guc.wq_head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(*ce->parallel.guc.wq_tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(*ce->parallel.guc.wq_status)); + } if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 2ff55b9994bc4fed4fb0eabc9ac1c053e93d1372..703f42ba5ddd6af6c6167fe887e0751ea6347d06 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) +#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) + #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ @@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ @@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) static const struct uc_fw_platform_requirement blobs_guc[] = { INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) }; + static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { + INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + }; static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; @@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; + uc_fw->wanted_path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; break; } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { + const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; + u32 count = ARRAY_SIZE(blobs_guc_fallback); + + for (i = 0; i < count && p <= blobs[i].p; i++) { + if (p == blobs[i].p && rev >= blobs[i].rev) { + const struct uc_fw_blob *blob = &blobs[i].blob; + + uc_fw->fallback.path = blob->path; + uc_fw->fallback.major_ver = blob->major; + uc_fw->fallback.minor_ver = blob->minor; + break; + } + } + } + /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { for (i = 1; i < fw_count; i++) { @@ -338,7 +363,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = request_firmware(&fw, uc_fw->path, dev); + err = firmware_request_nowarn(&fw, uc_fw->path, dev); + if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { + err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); + if (!err) { + drm_notice(&i915->drm, + "%s firmware %s is recommended, but only %s was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->wanted_path, + uc_fw->fallback.path); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + + uc_fw->path = uc_fw->fallback.path; + uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; + uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + } + } if (err) goto fail; @@ -437,8 +479,8 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) INTEL_UC_FIRMWARE_MISSING : INTEL_UC_FIRMWARE_ERROR); - drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -796,7 +838,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); + if (uc_fw->fallback.path) { + drm_printf(p, "%s firmware fallback: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); + drm_printf(p, "fallback selected: %s\n", + str_yes_no(uc_fw->path == uc_fw->fallback.path)); + } drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 3229018877d3dbc63fa52e395ca647a606dd2a78..562acdf88adb8943f0689606cfad505eb66fb2f0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -74,6 +74,7 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; + const char *wanted_path; const char *path; bool user_overridden; size_t size; @@ -98,6 +99,12 @@ struct intel_uc_fw { u16 major_ver_found; u16 minor_ver_found; + struct { + const char *path; + u16 major_ver; + u16 minor_ver; + } fallback; + u32 rsa_size; u32 ucode_size; diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e31942de0e5ce2fe738e237108a42..3f5750cc2673e7f99845e442708a1a7dce94d36a 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c +++ b/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -207,6 +207,7 @@ struct dcss_dev *dcss_dev_create(struct device *dev, bool hdmi_output) ret = dcss_submodules_init(dcss); if (ret) { + of_node_put(dcss->of_port); dev_err(dev, "submodules initialization failed\n"); goto clks_err; } @@ -237,6 +238,8 @@ void dcss_dev_destroy(struct dcss_dev *dcss) dcss_clocks_disable(dcss); } + of_node_put(dcss->of_port); + pm_runtime_disable(dcss->dev); dcss_submodules_stop(dcss); diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index c96014464355c3e26a18039a6ed88df6d9274eea..a189982601a48ac753b7ebabaa74cd6b180c7dfb 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -713,7 +713,7 @@ static int generic_edp_panel_probe(struct device *dev, struct panel_edp *panel) of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms); desc->delay.hpd_reliable = reliable_ms; of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms); - desc->delay.hpd_reliable = absent_ms; + desc->delay.hpd_absent = absent_ms; /* Power the panel on so we can read the EDID */ ret = pm_runtime_get_sync(dev); diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 191c56064f196bfd2af5f929e2767dc561ee495b..6b25b2f4f5a308185f5ca1777d47671e7f8dc864 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) } EXPORT_SYMBOL(drm_sched_entity_flush); -static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk) +static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); @@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); - init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work); - irq_work_queue(&job->work); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); + schedule_work(&job->work); } static struct dma_fence * diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 3d6f8ee355bfce0413a73bcb1e89175c9fce3a10..630cfa4ddd468c5b3ee4d4bda56b792aba2806e6 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -388,9 +388,9 @@ static irqreturn_t cdns_i2c_slave_isr(void *ptr) */ static irqreturn_t cdns_i2c_master_isr(void *ptr) { - unsigned int isr_status, avail_bytes, updatetx; + unsigned int isr_status, avail_bytes; unsigned int bytes_to_send; - bool hold_quirk; + bool updatetx; struct cdns_i2c *id = ptr; /* Signal completion only after everything is updated */ int done_flag = 0; @@ -410,11 +410,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * Check if transfer size register needs to be updated again for a * large data receive operation. */ - updatetx = 0; - if (id->recv_count > id->curr_recv_count) - updatetx = 1; - - hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx; + updatetx = id->recv_count > id->curr_recv_count; /* When receiving, handle data interrupt and completion interrupt */ if (id->p_recv_buf && @@ -445,7 +441,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) break; } - if (cdns_is_holdquirk(id, hold_quirk)) + if (cdns_is_holdquirk(id, updatetx)) break; } @@ -456,7 +452,7 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) * maintain transfer size non-zero while performing a large * receive operation. */ - if (cdns_is_holdquirk(id, hold_quirk)) { + if (cdns_is_holdquirk(id, updatetx)) { /* wait while fifo is full */ while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) != (id->curr_recv_count - CDNS_I2C_FIFO_DEPTH)) @@ -478,22 +474,6 @@ static irqreturn_t cdns_i2c_master_isr(void *ptr) CDNS_I2C_XFER_SIZE_OFFSET); id->curr_recv_count = id->recv_count; } - } else if (id->recv_count && !hold_quirk && - !id->curr_recv_count) { - - /* Set the slave address in address register*/ - cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK, - CDNS_I2C_ADDR_OFFSET); - - if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) { - cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE; - } else { - cdns_i2c_writereg(id->recv_count, - CDNS_I2C_XFER_SIZE_OFFSET); - id->curr_recv_count = id->recv_count; - } } /* Clear hold (if not repeated start) and signal completion */ diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index e9e2db68b9fb62f1bc3e47bf0770827667cc5063..78fb1a4274a6c3e04d4e697f8180179e0f616be3 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -66,7 +66,7 @@ /* IMX I2C registers: * the I2C register offset is different between SoCs, - * to provid support for all these chips, split the + * to provide support for all these chips, split the * register offset into a fixed base address and a * variable shift value, then the full register offset * will be calculated by diff --git a/drivers/i2c/busses/i2c-mlxcpld.c b/drivers/i2c/busses/i2c-mlxcpld.c index 56aa424fd71d5ce452885829fedf218a7728c6c8..815cc561386b021b9556f15e3686fa8d29c85cdf 100644 --- a/drivers/i2c/busses/i2c-mlxcpld.c +++ b/drivers/i2c/busses/i2c-mlxcpld.c @@ -49,7 +49,7 @@ #define MLXCPLD_LPCI2C_NACK_IND 2 #define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04 -#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c +#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e #define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42 enum mlxcpld_i2c_frequency { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 20e53b167f81f5d4dd03507edb9eb93bf3ed7411..c8539d0e12dd76841e1ffe364d30c1ea65d9ddad 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7304,7 +7304,9 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; conf->mddev = mddev; - if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) + ret = -ENOMEM; + conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!conf->stripe_hashtbl) goto abort; /* We init hash_locks[0] separately to that it can be used diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 2e0aa74ac18503becc7cac6fa6db59b853bd6802..95ef971b5e1cb4ce836fa0d2286ee2d81e28d8ce 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -13,10 +13,13 @@ lkdtm-$(CONFIG_LKDTM) += cfi.o lkdtm-$(CONFIG_LKDTM) += fortify.o lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o -KASAN_SANITIZE_rodata.o := n KASAN_SANITIZE_stackleak.o := n -KCOV_INSTRUMENT_rodata.o := n -CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) + +KASAN_SANITIZE_rodata.o := n +KCSAN_SANITIZE_rodata.o := n +KCOV_INSTRUMENT_rodata.o := n +OBJECT_FILES_NON_STANDARD_rodata.o := y +CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS) OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 86e867ffbb10a0e2691684ace11d1912f1fb93f7..033be559a730946882a6f954d64b89f7508b8e80 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1298,8 +1298,9 @@ static int sdhci_omap_probe(struct platform_device *pdev) /* * omap_device_pm_domain has callbacks to enable the main * functional clock, interface clock and also configure the - * SYSCONFIG register of omap devices. The callback will be invoked - * as part of pm_runtime_get_sync. + * SYSCONFIG register to clear any boot loader set voltage + * capabilities before calling sdhci_setup_host(). The + * callback will be invoked as part of pm_runtime_get_sync. */ pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 50); @@ -1441,7 +1442,8 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); - sdhci_runtime_suspend_host(host); + if (omap_host->con != -EINVAL) + sdhci_runtime_suspend_host(host); sdhci_omap_context_save(omap_host); @@ -1458,10 +1460,10 @@ static int __maybe_unused sdhci_omap_runtime_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - if (omap_host->con != -EINVAL) + if (omap_host->con != -EINVAL) { sdhci_omap_context_restore(omap_host); - - sdhci_runtime_resume_host(host, 0); + sdhci_runtime_resume_host(host, 0); + } return 0; } diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 889e403299568151a7e8417cbdc7a12c47dbed60..93da23682d862c3ea2c0407ee332cc165fd54304 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -850,9 +850,10 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, unsigned int tRP_ps; bool use_half_period; int sample_delay_ps, sample_delay_factor; - u16 busy_timeout_cycles; + unsigned int busy_timeout_cycles; u8 wrn_dly_sel; unsigned long clk_rate, min_rate; + u64 busy_timeout_ps; if (sdr->tRC_min >= 30000) { /* ONFI non-EDO modes [0-3] */ @@ -885,7 +886,8 @@ static int gpmi_nfc_compute_timings(struct gpmi_nand_data *this, addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps); data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps); data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps); - busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps); + busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max); + busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps); hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) | BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) | diff --git a/drivers/net/amt.c b/drivers/net/amt.c index be2719a3ba702b634feb190cebeebe288b8adc56..e019526e1df67527803363da684448d7f55ddae8 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -563,7 +563,7 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) ihv3->nsrcs = 0; ihv3->resv = 0; ihv3->suppress = false; - ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv; + ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); ihv3->csum = 0; csum = &ihv3->csum; csum_start = (void *)ihv3; @@ -577,14 +577,14 @@ static struct sk_buff *amt_build_igmp_gq(struct amt_dev *amt) return skb; } -static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status, - bool validate) +static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, + bool validate) { if (validate && amt->status >= status) return; netdev_dbg(amt->dev, "Update GW status %s -> %s", status_str[amt->status], status_str[status]); - amt->status = status; + WRITE_ONCE(amt->status, status); } static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, @@ -600,14 +600,6 @@ static void __amt_update_relay_status(struct amt_tunnel_list *tunnel, tunnel->status = status; } -static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status, - bool validate) -{ - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, status, validate); - spin_unlock_bh(&amt->lock); -} - static void amt_update_relay_status(struct amt_tunnel_list *tunnel, enum amt_status status, bool validate) { @@ -700,9 +692,7 @@ static void amt_send_discovery(struct amt_dev *amt) if (unlikely(net_xmit_eval(err))) amt->dev->stats.tx_errors++; - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); - spin_unlock_bh(&amt->lock); + amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true); out: rcu_read_unlock(); } @@ -900,6 +890,28 @@ static void amt_send_mld_gq(struct amt_dev *amt, struct amt_tunnel_list *tunnel) } #endif +static bool amt_queue_event(struct amt_dev *amt, enum amt_event event, + struct sk_buff *skb) +{ + int index; + + spin_lock_bh(&amt->lock); + if (amt->nr_events >= AMT_MAX_EVENTS) { + spin_unlock_bh(&amt->lock); + return 1; + } + + index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS; + amt->events[index].event = event; + amt->events[index].skb = skb; + amt->nr_events++; + amt->event_idx %= AMT_MAX_EVENTS; + queue_work(amt_wq, &amt->event_wq); + spin_unlock_bh(&amt->lock); + + return 0; +} + static void amt_secret_work(struct work_struct *work) { struct amt_dev *amt = container_of(to_delayed_work(work), @@ -913,58 +925,72 @@ static void amt_secret_work(struct work_struct *work) msecs_to_jiffies(AMT_SECRET_TIMEOUT)); } -static void amt_discovery_work(struct work_struct *work) +static void amt_event_send_discovery(struct amt_dev *amt) { - struct amt_dev *amt = container_of(to_delayed_work(work), - struct amt_dev, - discovery_wq); - - spin_lock_bh(&amt->lock); if (amt->status > AMT_STATUS_SENT_DISCOVERY) goto out; get_random_bytes(&amt->nonce, sizeof(__be32)); - spin_unlock_bh(&amt->lock); amt_send_discovery(amt); - spin_lock_bh(&amt->lock); out: mod_delayed_work(amt_wq, &amt->discovery_wq, msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); - spin_unlock_bh(&amt->lock); } -static void amt_req_work(struct work_struct *work) +static void amt_discovery_work(struct work_struct *work) { struct amt_dev *amt = container_of(to_delayed_work(work), struct amt_dev, - req_wq); + discovery_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL)) + mod_delayed_work(amt_wq, &amt->discovery_wq, + msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT)); +} + +static void amt_event_send_request(struct amt_dev *amt) +{ u32 exp; - spin_lock_bh(&amt->lock); if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT) goto out; if (amt->req_cnt > AMT_MAX_REQ_COUNT) { netdev_dbg(amt->dev, "Gateway is not ready"); amt->qi = AMT_INIT_REQ_TIMEOUT; - amt->ready4 = false; - amt->ready6 = false; + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); amt->remote_ip = 0; - __amt_update_gw_status(amt, AMT_STATUS_INIT, false); + amt_update_gw_status(amt, AMT_STATUS_INIT, false); amt->req_cnt = 0; + amt->nonce = 0; goto out; } - spin_unlock_bh(&amt->lock); + + if (!amt->req_cnt) { + WRITE_ONCE(amt->ready4, false); + WRITE_ONCE(amt->ready6, false); + get_random_bytes(&amt->nonce, sizeof(__be32)); + } amt_send_request(amt, false); amt_send_request(amt, true); - spin_lock_bh(&amt->lock); - __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); + amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true); amt->req_cnt++; out: exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT); mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000)); - spin_unlock_bh(&amt->lock); +} + +static void amt_req_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(to_delayed_work(work), + struct amt_dev, + req_wq); + + if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL)) + mod_delayed_work(amt_wq, &amt->req_wq, + msecs_to_jiffies(100)); } static bool amt_send_membership_update(struct amt_dev *amt, @@ -1220,7 +1246,8 @@ static netdev_tx_t amt_dev_xmit(struct sk_buff *skb, struct net_device *dev) /* Gateway only passes IGMP/MLD packets */ if (!report) goto free; - if ((!v6 && !amt->ready4) || (v6 && !amt->ready6)) + if ((!v6 && !READ_ONCE(amt->ready4)) || + (v6 && !READ_ONCE(amt->ready6))) goto free; if (amt_send_membership_update(amt, skb, v6)) goto free; @@ -2236,6 +2263,10 @@ static bool amt_advertisement_handler(struct amt_dev *amt, struct sk_buff *skb) ipv4_is_zeronet(amta->ip4)) return true; + if (amt->status != AMT_STATUS_SENT_DISCOVERY || + amt->nonce != amta->nonce) + return true; + amt->remote_ip = amta->ip4; netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip); mod_delayed_work(amt_wq, &amt->req_wq, 0); @@ -2251,6 +2282,9 @@ static bool amt_multicast_data_handler(struct amt_dev *amt, struct sk_buff *skb) struct ethhdr *eth; struct iphdr *iph; + if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE) + return true; + hdr_size = sizeof(*amtmd) + sizeof(struct udphdr); if (!pskb_may_pull(skb, hdr_size)) return true; @@ -2325,6 +2359,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, if (amtmq->reserved || amtmq->version) return true; + if (amtmq->nonce != amt->nonce) + return true; + hdr_size -= sizeof(*eth); if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false)) return true; @@ -2339,6 +2376,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, iph = ip_hdr(skb); if (iph->version == 4) { + if (READ_ONCE(amt->ready4)) + return true; + if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS + sizeof(*ihv3))) return true; @@ -2349,12 +2389,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt, ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS); skb_reset_transport_header(skb); skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS); - spin_lock_bh(&amt->lock); - amt->ready4 = true; + WRITE_ONCE(amt->ready4, true); amt->mac = amtmq->response_mac; amt->req_cnt = 0; amt->qi = ihv3->qqic; - spin_unlock_bh(&amt->lock); skb->protocol = htons(ETH_P_IP); eth->h_proto = htons(ETH_P_IP); ip_eth_mc_map(iph->daddr, eth->h_dest); @@ -2363,6 +2401,9 @@ static bool amt_membership_query_handler(struct amt_dev *amt, struct mld2_query *mld2q; struct ipv6hdr *ip6h; + if (READ_ONCE(amt->ready6)) + return true; + if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS + sizeof(*mld2q))) return true; @@ -2374,12 +2415,10 @@ static bool amt_membership_query_handler(struct amt_dev *amt, mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); skb_reset_transport_header(skb); skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS); - spin_lock_bh(&amt->lock); - amt->ready6 = true; + WRITE_ONCE(amt->ready6, true); amt->mac = amtmq->response_mac; amt->req_cnt = 0; amt->qi = mld2q->mld2q_qqic; - spin_unlock_bh(&amt->lock); skb->protocol = htons(ETH_P_IPV6); eth->h_proto = htons(ETH_P_IPV6); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); @@ -2392,12 +2431,14 @@ static bool amt_membership_query_handler(struct amt_dev *amt, skb->pkt_type = PACKET_MULTICAST; skb->ip_summed = CHECKSUM_NONE; len = skb->len; + local_bh_disable(); if (__netif_rx(skb) == NET_RX_SUCCESS) { amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true); dev_sw_netstats_rx_add(amt->dev, len); } else { amt->dev->stats.rx_dropped++; } + local_bh_enable(); return false; } @@ -2638,7 +2679,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) if (tunnel->ip4 == iph->saddr) goto send; + spin_lock_bh(&amt->lock); if (amt->nr_tunnels >= amt->max_tunnels) { + spin_unlock_bh(&amt->lock); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); return true; } @@ -2646,8 +2689,10 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) tunnel = kzalloc(sizeof(*tunnel) + (sizeof(struct hlist_head) * amt->hash_buckets), GFP_ATOMIC); - if (!tunnel) + if (!tunnel) { + spin_unlock_bh(&amt->lock); return true; + } tunnel->source_port = udph->source; tunnel->ip4 = iph->saddr; @@ -2660,10 +2705,9 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire); - spin_lock_bh(&amt->lock); list_add_tail_rcu(&tunnel->list, &amt->tunnel_list); tunnel->key = amt->key; - amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true); + __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true); amt->nr_tunnels++; mod_delayed_work(amt_wq, &tunnel->gc_wq, msecs_to_jiffies(amt_gmi(amt))); @@ -2688,6 +2732,38 @@ static bool amt_request_handler(struct amt_dev *amt, struct sk_buff *skb) return false; } +static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb) +{ + int type = amt_parse_type(skb); + int err = 1; + + if (type == -1) + goto drop; + + if (amt->mode == AMT_MODE_GATEWAY) { + switch (type) { + case AMT_MSG_ADVERTISEMENT: + err = amt_advertisement_handler(amt, skb); + break; + case AMT_MSG_MEMBERSHIP_QUERY: + err = amt_membership_query_handler(amt, skb); + if (!err) + return; + break; + default: + netdev_dbg(amt->dev, "Invalid type of Gateway\n"); + break; + } + } +drop: + if (err) { + amt->dev->stats.rx_dropped++; + kfree_skb(skb); + } else { + consume_skb(skb); + } +} + static int amt_rcv(struct sock *sk, struct sk_buff *skb) { struct amt_dev *amt; @@ -2719,8 +2795,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) err = true; goto drop; } - err = amt_advertisement_handler(amt, skb); - break; + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; + goto drop; + } + goto out; case AMT_MSG_MULTICAST_DATA: if (iph->saddr != amt->remote_ip) { netdev_dbg(amt->dev, "Invalid Relay IP\n"); @@ -2738,11 +2818,12 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) err = true; goto drop; } - err = amt_membership_query_handler(amt, skb); - if (err) + if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) { + netdev_dbg(amt->dev, "AMT Event queue full\n"); + err = true; goto drop; - else - goto out; + } + goto out; default: err = true; netdev_dbg(amt->dev, "Invalid type of Gateway\n"); @@ -2780,6 +2861,46 @@ static int amt_rcv(struct sock *sk, struct sk_buff *skb) return 0; } +static void amt_event_work(struct work_struct *work) +{ + struct amt_dev *amt = container_of(work, struct amt_dev, event_wq); + struct sk_buff *skb; + u8 event; + int i; + + for (i = 0; i < AMT_MAX_EVENTS; i++) { + spin_lock_bh(&amt->lock); + if (amt->nr_events == 0) { + spin_unlock_bh(&amt->lock); + return; + } + event = amt->events[amt->event_idx].event; + skb = amt->events[amt->event_idx].skb; + amt->events[amt->event_idx].event = AMT_EVENT_NONE; + amt->events[amt->event_idx].skb = NULL; + amt->nr_events--; + amt->event_idx++; + amt->event_idx %= AMT_MAX_EVENTS; + spin_unlock_bh(&amt->lock); + + switch (event) { + case AMT_EVENT_RECEIVE: + amt_gw_rcv(amt, skb); + break; + case AMT_EVENT_SEND_DISCOVERY: + amt_event_send_discovery(amt); + break; + case AMT_EVENT_SEND_REQUEST: + amt_event_send_request(amt); + break; + default: + if (skb) + kfree_skb(skb); + break; + } + } +} + static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) { struct amt_dev *amt; @@ -2804,7 +2925,7 @@ static int amt_err_lookup(struct sock *sk, struct sk_buff *skb) break; case AMT_MSG_REQUEST: case AMT_MSG_MEMBERSHIP_UPDATE: - if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT) + if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT) mod_delayed_work(amt_wq, &amt->req_wq, 0); break; default: @@ -2867,6 +2988,8 @@ static int amt_dev_open(struct net_device *dev) amt->ready4 = false; amt->ready6 = false; + amt->event_idx = 0; + amt->nr_events = 0; err = amt_socket_create(amt); if (err) @@ -2874,6 +2997,7 @@ static int amt_dev_open(struct net_device *dev) amt->req_cnt = 0; amt->remote_ip = 0; + amt->nonce = 0; get_random_bytes(&amt->key, sizeof(siphash_key_t)); amt->status = AMT_STATUS_INIT; @@ -2892,6 +3016,8 @@ static int amt_dev_stop(struct net_device *dev) struct amt_dev *amt = netdev_priv(dev); struct amt_tunnel_list *tunnel, *tmp; struct socket *sock; + struct sk_buff *skb; + int i; cancel_delayed_work_sync(&amt->req_wq); cancel_delayed_work_sync(&amt->discovery_wq); @@ -2904,6 +3030,15 @@ static int amt_dev_stop(struct net_device *dev) if (sock) udp_tunnel_sock_release(sock); + cancel_work_sync(&amt->event_wq); + for (i = 0; i < AMT_MAX_EVENTS; i++) { + skb = amt->events[i].skb; + if (skb) + kfree_skb(skb); + amt->events[i].event = AMT_EVENT_NONE; + amt->events[i].skb = NULL; + } + amt->ready4 = false; amt->ready6 = false; amt->req_cnt = 0; @@ -3095,7 +3230,7 @@ static int amt_newlink(struct net *net, struct net_device *dev, goto err; } if (amt->mode == AMT_MODE_RELAY) { - amt->qrv = amt->net->ipv4.sysctl_igmp_qrv; + amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv); amt->qri = 10; dev->needed_headroom = amt->stream_dev->needed_headroom + AMT_RELAY_HLEN; @@ -3146,8 +3281,8 @@ static int amt_newlink(struct net *net, struct net_device *dev, INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work); INIT_DELAYED_WORK(&amt->req_wq, amt_req_work); INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work); + INIT_WORK(&amt->event_wq, amt_event_work); INIT_LIST_HEAD(&amt->tunnel_list); - return 0; err: dev_put(amt->stream_dev); @@ -3280,7 +3415,7 @@ static int __init amt_init(void) if (err < 0) goto unregister_notifier; - amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1); + amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0); if (!amt_wq) { err = -ENOMEM; goto rtnl_unregister; diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index ba42cef10a532df6fe0c4ecf85384d257f9c1c8d..cb0321ea853c107913499e0c547d9d15768d699d 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1843,6 +1843,7 @@ static int rcar_canfd_probe(struct platform_device *pdev) of_child = of_get_child_by_name(pdev->dev.of_node, name); if (of_child && of_device_is_available(of_child)) channels_mask |= BIT(i); + of_node_put(of_child); } if (chip_id != RENESAS_RZG2L) { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 9b47b07162fe5292aca355aaf45ba008189be6a0..bc6518504fd463ccadb9d2014109838ffe94a24b 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1690,8 +1690,8 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv) u32 osc; int err; - /* The OSC_LPMEN is only supported on MCP2518FD, so use it to - * autodetect the model. + /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863, + * so use it to autodetect the model. */ err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC, MCP251XFD_REG_OSC_LPMEN, @@ -1703,10 +1703,18 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv) if (err) return err; - if (osc & MCP251XFD_REG_OSC_LPMEN) - devtype_data = &mcp251xfd_devtype_data_mcp2518fd; - else + if (osc & MCP251XFD_REG_OSC_LPMEN) { + /* We cannot distinguish between MCP2518FD and + * MCP251863. If firmware specifies MCP251863, keep + * it, otherwise set to MCP2518FD. + */ + if (mcp251xfd_is_251863(priv)) + devtype_data = &mcp251xfd_devtype_data_mcp251863; + else + devtype_data = &mcp251xfd_devtype_data_mcp2518fd; + } else { devtype_data = &mcp251xfd_devtype_data_mcp2517fd; + } if (!mcp251xfd_is_251XFD(priv) && priv->devtype_data.model != devtype_data->model) { diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 9ca8c8d7740f4d8da599be17cb84cd620af33fb3..92a500e1ccd216162ef5affc7d138b3999c01403 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1038,18 +1038,21 @@ int ksz_switch_register(struct ksz_device *dev, ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports"); if (!ports) ports = of_get_child_by_name(dev->dev->of_node, "ports"); - if (ports) + if (ports) { for_each_available_child_of_node(ports, port) { if (of_property_read_u32(port, "reg", &port_num)) continue; if (!(dev->port_mask & BIT(port_num))) { of_node_put(port); + of_node_put(ports); return -EINVAL; } of_get_phy_mode(port, &dev->ports[port_num].interface); } + of_node_put(ports); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); dev->synclko_disable = of_property_read_bool(dev->dev->of_node, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 72b6fc1932b5445518f3b2fc3907e1f7ce1baa41..698c7d1fb45cc97d4ef08c2b61b1f50f202d99b0 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3382,12 +3382,28 @@ static const struct of_device_id sja1105_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, sja1105_dt_ids); +static const struct spi_device_id sja1105_spi_ids[] = { + { "sja1105e" }, + { "sja1105t" }, + { "sja1105p" }, + { "sja1105q" }, + { "sja1105r" }, + { "sja1105s" }, + { "sja1110a" }, + { "sja1110b" }, + { "sja1110c" }, + { "sja1110d" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, sja1105_spi_ids); + static struct spi_driver sja1105_driver = { .driver = { .name = "sja1105", .owner = THIS_MODULE, .of_match_table = of_match_ptr(sja1105_dt_ids), }, + .id_table = sja1105_spi_ids, .probe = sja1105_probe, .remove = sja1105_remove, .shutdown = sja1105_shutdown, diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 3110895358d8dcc31715228acbe42e183c37c4fd..97a92e6da60d8b95a9209b71461dd90233c84846 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c +++ b/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -205,10 +205,20 @@ static const struct of_device_id vsc73xx_of_match[] = { }; MODULE_DEVICE_TABLE(of, vsc73xx_of_match); +static const struct spi_device_id vsc73xx_spi_ids[] = { + { "vsc7385" }, + { "vsc7388" }, + { "vsc7395" }, + { "vsc7398" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids); + static struct spi_driver vsc73xx_spi_driver = { .probe = vsc73xx_spi_probe, .remove = vsc73xx_spi_remove, .shutdown = vsc73xx_spi_shutdown, + .id_table = vsc73xx_spi_ids, .driver = { .name = "vsc73xx-spi", .of_match_table = vsc73xx_of_match, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 7c760aa655404ab45a67a434f23bada66281253f..ddfe9208529a5ebebbc168a7aa01919acf15c947 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1236,8 +1236,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, csk->sndbuf = newsk->sk_sndbuf; csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx; RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk), - sock_net(newsk)-> - ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(newsk)-> + ipv4.sysctl_tcp_window_scaling), tp->window_clamp); neigh_release(n); inet_inherit_port(&tcp_hashinfo, lsk, newsk); @@ -1384,7 +1384,7 @@ static void chtls_pass_accept_request(struct sock *sk, #endif } if (req->tcpopt.wsf <= 14 && - sock_net(sk)->ipv4.sysctl_tcp_window_scaling) { + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { inet_rsk(oreq)->wscale_ok = 1; inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf; } diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 528eb0f223b176309d058a2d8528f70dc096785e..b4f5e57d0285cb516a26e6d1b7dd54bd9fee0ae7 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2287,7 +2287,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) /* Uses sync mcc */ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data) + u8 page_num, u32 off, u32 len, u8 *data) { struct be_dma_mem cmd; struct be_mcc_wrb *wrb; @@ -2321,10 +2321,10 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, req->port = cpu_to_le32(adapter->hba_port_num); req->page_num = cpu_to_le32(page_num); status = be_mcc_notify_wait(adapter); - if (!status) { + if (!status && len > 0) { struct be_cmd_resp_port_type *resp = cmd.va; - memcpy(data, resp->page_data, PAGE_DATA_LEN); + memcpy(data, resp->page_data + off, len); } err: mutex_unlock(&adapter->mcc_lock); @@ -2415,7 +2415,7 @@ int be_cmd_query_cable_type(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { switch (adapter->phy.interface_type) { case PHY_TYPE_QSFP: @@ -2440,7 +2440,7 @@ int be_cmd_query_sfp_info(struct be_adapter *adapter) int status; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { strlcpy(adapter->phy.vendor_name, page_data + SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index db1f3b908582efe3c3a7504522325f0e2b781824..e2085c68c0ee784e8bd6f5ef41f478eb5f789a52 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2427,7 +2427,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon, int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state); int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, - u8 page_num, u8 *data); + u8 page_num, u32 off, u32 len, u8 *data); int be_cmd_query_cable_type(struct be_adapter *adapter); int be_cmd_query_sfp_info(struct be_adapter *adapter); int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index dfa784339781d3b8785ee6d38db6069b47cf5750..bd0df189d87192f704bb12e63c66618dfa5e800a 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1344,7 +1344,7 @@ static int be_get_module_info(struct net_device *netdev, return -EOPNOTSUPP; status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - page_data); + 0, PAGE_DATA_LEN, page_data); if (!status) { if (!page_data[SFP_PLUS_SFF_8472_COMP]) { modinfo->type = ETH_MODULE_SFF_8079; @@ -1362,25 +1362,32 @@ static int be_get_module_eeprom(struct net_device *netdev, { struct be_adapter *adapter = netdev_priv(netdev); int status; + u32 begin, end; if (!check_privilege(adapter, MAX_PRIVILEGES)) return -EOPNOTSUPP; - status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, - data); - if (status) - goto err; + begin = eeprom->offset; + end = eeprom->offset + eeprom->len; + + if (begin < PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin, + min_t(u32, end, PAGE_DATA_LEN) - begin, + data); + if (status) + goto err; + + data += PAGE_DATA_LEN - begin; + begin = PAGE_DATA_LEN; + } - if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) { - status = be_cmd_read_port_transceiver_data(adapter, - TR_PAGE_A2, - data + - PAGE_DATA_LEN); + if (end > PAGE_DATA_LEN) { + status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2, + begin - PAGE_DATA_LEN, + end - begin, data); if (status) goto err; } - if (eeprom->offset) - memcpy(data, data + eeprom->offset, eeprom->len); err: return be_cmd_status(status); } diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 13382df2f2eff4577128430ff55882a7434280d7..bcf680e838113ddfd9d73a712fb94eac772ea343 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -630,7 +630,6 @@ struct e1000_phy_info { bool disable_polarity_correction; bool is_mdix; bool polarity_correction; - bool reset_disable; bool speed_downgraded; bool autoneg_wait_to_complete; }; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index e6c8e6d5234f81d000ce63258a8b31df5d6d9d98..9466f65a6da774010c42b2e45c5190fee1f958c4 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2050,10 +2050,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) bool blocked = false; int i = 0; - /* Check the PHY (LCD) reset flag */ - if (hw->phy.reset_disable) - return true; - while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) usleep_range(10000, 11000); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 638a3ddd7ada8b3d9bae93e4c0c9e99527ef4cac..2504b11c3169fa6886403b163bb8705729ffda56 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -271,7 +271,6 @@ #define I217_CGFREG_ENABLE_MTA_RESET 0x0002 #define I217_MEMPWR PHY_REG(772, 26) #define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010 -#define I217_MEMPWR_MOEM 0x1000 /* Receive Address Initial CRC Calculation */ #define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4)) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fa06f68c8c8038c5355ff605f3854703259a739b..f1729940e46cef9c8a655c64b754f89559f4177a 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6494,6 +6494,10 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && hw->mac.type >= e1000_pch_adp) { + /* Keep the GPT clock enabled for CSME */ + mac_data = er32(FEXTNVM); + mac_data |= BIT(3); + ew32(FEXTNVM, mac_data); /* Request ME unconfigure the device from S0ix */ mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; @@ -6987,21 +6991,8 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data |= I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Disable LCD reset */ - hw->phy.reset_disable = true; - } - e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); @@ -7023,8 +7014,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; - u16 phy_data; int rc; /* Introduce S0ix implementation */ @@ -7035,17 +7024,6 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) if (rc) return rc; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && - hw->mac.type >= e1000_pch_adp) { - /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */ - e1e_rphy(hw, I217_MEMPWR, &phy_data); - phy_data &= ~I217_MEMPWR_MOEM; - e1e_wphy(hw, I217_MEMPWR, phy_data); - - /* Enable LCD reset */ - hw->phy.reset_disable = false; - } - return e1000e_pm_thaw(dev); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index aa786fd5595113c521c6f1b500ed94b4f0d80dd1..7f1a0d90dc51eabbfef366527bdea7cfb70245f5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10650,7 +10650,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { - int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); + const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; @@ -10658,13 +10658,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - i40e_check_recovery_mode(pf)) { + is_recovery_mode_reported) i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev); - } if (test_bit(__I40E_DOWN, pf->state) && - !test_bit(__I40E_RECOVERY_MODE, pf->state) && - !old_recovery_mode_bit) + !test_bit(__I40E_RECOVERY_MODE, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -10691,13 +10689,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ - if (test_bit(__I40E_RECOVERY_MODE, pf->state) || - old_recovery_mode_bit) { + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities)) goto end_unlock; - if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { + if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here */ diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 49aed3e506a66d3bee4cb9f563e1fc74fddde03a..0ea0361cd86b1f98c21291051c8c52cff9576295 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -64,7 +64,6 @@ struct iavf_vsi { u16 id; DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__); int base_vector; - u16 work_limit; u16 qs_handle; void *priv; /* client driver data reference. */ }; @@ -159,8 +158,12 @@ struct iavf_vlan { struct iavf_vlan_filter { struct list_head list; struct iavf_vlan vlan; - bool remove; /* filter needs to be removed */ - bool add; /* filter needs to be added */ + struct { + u8 is_new_vlan:1; /* filter is new, wait for PF answer */ + u8 remove:1; /* filter needs to be removed */ + u8 add:1; /* filter needs to be added */ + u8 padding:5; + }; }; #define IAVF_MAX_TRAFFIC_CLASS 4 @@ -461,6 +464,10 @@ static inline const char *iavf_state_str(enum iavf_state_t state) return "__IAVF_INIT_VERSION_CHECK"; case __IAVF_INIT_GET_RESOURCES: return "__IAVF_INIT_GET_RESOURCES"; + case __IAVF_INIT_EXTENDED_CAPS: + return "__IAVF_INIT_EXTENDED_CAPS"; + case __IAVF_INIT_CONFIG_ADAPTER: + return "__IAVF_INIT_CONFIG_ADAPTER"; case __IAVF_INIT_SW: return "__IAVF_INIT_SW"; case __IAVF_INIT_FAILED: @@ -520,6 +527,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter); int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter); int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter); void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter); +u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter); void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); void iavf_configure_queues(struct iavf_adapter *adapter); void iavf_deconfigure_queues(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 3bb56714beb034e2c8c483d8970a0e8634b9e297..e535d4c3da49d92e80214dc7ca39b4f75f311635 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -692,12 +692,8 @@ static int __iavf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_vsi *vsi = &adapter->vsi; struct iavf_ring *rx_ring, *tx_ring; - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; - /* Rx and Tx usecs per queue value. If user doesn't specify the * queue, return queue 0's value to represent. */ @@ -825,12 +821,8 @@ static int __iavf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct iavf_adapter *adapter = netdev_priv(netdev); - struct iavf_vsi *vsi = &adapter->vsi; int i; - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); @@ -1969,8 +1961,6 @@ static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir, static const struct ethtool_ops iavf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_MAX_FRAMES_IRQ | ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = iavf_get_drvinfo, .get_link = ethtool_op_get_link, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f3ecb3bca33dde77895c7742861940c9675e9249..2e2c153ce46a3c791b0aedc73e61077478cf50c8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -843,7 +843,7 @@ static void iavf_restore_filters(struct iavf_adapter *adapter) * iavf_get_num_vlans_added - get number of VLANs added * @adapter: board private structure */ -static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter) +u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter) { return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) + bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID); @@ -906,11 +906,6 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev, if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto)))) return -ENOMEM; - if (proto == cpu_to_be16(ETH_P_8021Q)) - set_bit(vid, adapter->vsi.active_cvlans); - else - set_bit(vid, adapter->vsi.active_svlans); - return 0; } @@ -2245,7 +2240,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; - adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK; vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { @@ -2956,6 +2950,9 @@ static void iavf_reset_task(struct work_struct *work) adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); + bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID); + bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID); + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2); /* We were running when the reset started, so we need to restore some diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 978f651c6b093b5940a87720cbf2cd29bec04e21..06d18797d25a28c4406f5f3247ab360c3ceb3ef8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -194,7 +194,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, struct iavf_tx_buffer *tx_buf; struct iavf_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; - unsigned int budget = vsi->work_limit; + unsigned int budget = IAVF_DEFAULT_IRQ_WORK; tx_buf = &tx_ring->tx_bi[i]; tx_desc = IAVF_TX_DESC(tx_ring, i); @@ -1285,11 +1285,10 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; - if (!size) - return NULL; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); + if (!size) + return rx_buffer; /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 782450d5c12fc26a2c7e358093cc1791096c6b83..1603e99bae4af3737fa1b3195c443e7aee9edfd9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -626,6 +626,33 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->mac_vlan_list_lock); } +/** + * iavf_vlan_add_reject + * @adapter: adapter structure + * + * Remove VLAN filters from list based on PF response. + **/ +static void iavf_vlan_add_reject(struct iavf_adapter *adapter) +{ + struct iavf_vlan_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { + if (f->is_new_vlan) { + if (f->vlan.tpid == ETH_P_8021Q) + clear_bit(f->vlan.vid, + adapter->vsi.active_cvlans); + else + clear_bit(f->vlan.vid, + adapter->vsi.active_svlans); + + list_del(&f->list); + kfree(f); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + /** * iavf_add_vlans * @adapter: adapter structure @@ -683,6 +710,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) vvfl->vlan_id[i] = f->vlan.vid; i++; f->add = false; + f->is_new_vlan = true; if (i == count) break; } @@ -695,10 +723,18 @@ void iavf_add_vlans(struct iavf_adapter *adapter) iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); } else { + u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters; + u16 current_vlans = iavf_get_num_vlans_added(adapter); struct virtchnl_vlan_filter_list_v2 *vvfl_v2; adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2; + if ((count + current_vlans) > max_vlans && + current_vlans < max_vlans) { + count = max_vlans - iavf_get_num_vlans_added(adapter); + more = true; + } + len = sizeof(*vvfl_v2) + ((count - 1) * sizeof(struct virtchnl_vlan_filter)); if (len > IAVF_MAX_AQ_BUF_SIZE) { @@ -725,6 +761,9 @@ void iavf_add_vlans(struct iavf_adapter *adapter) &adapter->vlan_v2_caps.filtering.filtering_support; struct virtchnl_vlan *vlan; + if (i == count) + break; + /* give priority over outer if it's enabled */ if (filtering_support->outer) vlan = &vvfl_v2->filters[i].outer; @@ -736,8 +775,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter) i++; f->add = false; - if (i == count) - break; + f->is_new_vlan = true; } } @@ -2080,6 +2118,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, */ iavf_netdev_features_vlan_strip_set(netdev, true); break; + case VIRTCHNL_OP_ADD_VLAN_V2: + iavf_vlan_add_reject(adapter); + dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + iavf_stat_str(&adapter->hw, v_retval)); + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, iavf_stat_str(&adapter->hw, v_retval), @@ -2332,6 +2375,24 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->adv_rss_lock); } break; + case VIRTCHNL_OP_ADD_VLAN_V2: { + struct iavf_vlan_filter *f; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry(f, &adapter->vlan_filter_list, list) { + if (f->is_new_vlan) { + f->is_new_vlan = false; + if (f->vlan.tpid == ETH_P_8021Q) + set_bit(f->vlan.vid, + adapter->vsi.active_cvlans); + else + set_bit(f->vlan.vid, + adapter->vsi.active_svlans); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); + } + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: /* PF enabled vlan strip on this VF. * Update netdev->features if needed to be in sync with ethtool. diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ae17af44fe02502f7ad44dc428dc97e6b5dafe67..a5ebee7df4a8544c572dbe4d5f979571c183df03 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6171,6 +6171,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; + if (IGC_REMOVED(hw_addr)) + return ~value; + value = readl(&hw_addr[reg]); /* reads should not return all F's */ diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index e197a33d93a03099eeb55881dfea1293decbfd4f..026c3b65fc37aab1ff593a221bfbb9e0512fbf39 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -306,7 +306,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - writel((val), &hw_addr[(reg)]); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) @@ -318,4 +319,6 @@ do { \ #define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) +#define IGC_REMOVED(h) unlikely(!(h)) + #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 921a4d977d65137d4eb20bd97c6dfe9750122042..8813b4dd6872f5a0e4cae7ceeb1bfb768bdafe6b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -779,6 +779,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ + spinlock_t vfs_lock; }; static inline int ixgbe_determine_xdp_q_idx(int cpu) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 77c2e70b0860d538a17942c6fe51ce8902c21a47..55f91c9ff0478ce744553c61dae54ef057a81d7a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6403,6 +6403,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); + /* init spinlock to avoid concurrency of VF resources */ + spin_lock_init(&adapter->vfs_lock); + #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index d4e63f0644c366598eb435872b1c3aed5ab1866d..a1e69c7348632d19bdd833b8c75f71232a8de7ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -205,10 +205,13 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + unsigned long flags; int rss; + spin_lock_irqsave(&adapter->vfs_lock, flags); /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + spin_unlock_irqrestore(&adapter->vfs_lock, flags); /* put the reference to all of the vf devices */ for (vf = 0; vf < num_vfs; ++vf) { @@ -1355,8 +1358,10 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) void ixgbe_msg_task(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; u32 vf; + spin_lock_irqsave(&adapter->vfs_lock, flags); for (vf = 0; vf < adapter->num_vfs; vf++) { /* process any reset requests */ if (!ixgbe_check_for_rst(hw, vf)) @@ -1370,6 +1375,7 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter) if (!ixgbe_check_for_ack(hw, vf)) ixgbe_rcv_ack_from_vf(adapter, vf); } + spin_unlock_irqrestore(&adapter->vfs_lock, flags); } static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index d43e503c644f85d496ff55761ca1b7e1d2633af5..4d93ad6a284c0318d9f4deb560c2ffa798377707 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -167,12 +167,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule, } port = netdev_priv(ingress_dev); - mask = htons(0x1FFF); - key = htons(port->hw_id); + mask = htons(0x1FFF << 3); + key = htons(port->hw_id << 3); rule_match_set(r_match->key, SYS_PORT, key); rule_match_set(r_match->mask, SYS_PORT, mask); - mask = htons(0x1FF); + mask = htons(0x3FF); key = htons(port->dev_id); rule_match_set(r_match->key, SYS_DEV, key); rule_match_set(r_match->mask, SYS_DEV, mask); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 90e7dfd011c91f0cdb20b01d9d4cba7fc9730453..5d457bc9acc1cd12ece0cad7e92cf1de34c514ba 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -93,6 +93,9 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i }; struct net_device_path path = {}; + if (!ctx.dev) + return -ENODEV; + memcpy(ctx.daddr, addr, sizeof(ctx.daddr)); if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 8f0cd3196aacffedf6f59957fdd131e7ddb6e305..29be2fcafea3bc3907fe5b90a8bdb6e7e3997578 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -651,7 +651,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) * WDMA RX. */ - BUG_ON(idx > ARRAY_SIZE(dev->tx_ring)); + BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring)); if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE)) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0d8a0068e4ca1c1b004ac752a9905554317cacb9..ce33dbde124d9dc4baedd125581da2d9d05bbcc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5384,7 +5384,7 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, { const struct fib_nh *nh = fib_info_nh(fi, 0); - return nh->fib_nh_scope == RT_SCOPE_LINK || + return nh->fib_nh_gw_family || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } @@ -10324,7 +10324,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, unsigned long *fields = config->fields; u32 hash_fields; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: mlxsw_sp_mp4_hash_outer_addr(config); break; @@ -10342,7 +10342,7 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_mp_hash_inner_l3(config); break; case 3: - hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); /* Outer */ MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP); MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP); @@ -10523,13 +10523,14 @@ static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { struct net *net = mlxsw_sp_net(mlxsw_sp); - bool usp = net->ipv4.sysctl_ip_fwd_update_priority; char rgcr_pl[MLXSW_REG_RGCR_LEN]; u64 max_rifs; + bool usp; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority); mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index 005e56ea5da1204c98e12778aa791b39b56b9dc9..5893770bfd94627139cdca7d61b88a224040ad48 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -75,6 +75,9 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid, unsigned int vid, enum macaccess_entry_type type) { + int ret; + + spin_lock(&lan966x->mac_lock); lan966x_mac_select(lan966x, mac, vid); /* Issue a write command */ @@ -86,7 +89,10 @@ static int __lan966x_mac_learn(struct lan966x *lan966x, int pgid, ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN), lan966x, ANA_MACACCESS); - return lan966x_mac_wait_for_completion(lan966x); + ret = lan966x_mac_wait_for_completion(lan966x); + spin_unlock(&lan966x->mac_lock); + + return ret; } /* The mask of the front ports is encoded inside the mac parameter via a call @@ -113,11 +119,13 @@ int lan966x_mac_learn(struct lan966x *lan966x, int port, return __lan966x_mac_learn(lan966x, port, false, mac, vid, type); } -int lan966x_mac_forget(struct lan966x *lan966x, - const unsigned char mac[ETH_ALEN], - unsigned int vid, - enum macaccess_entry_type type) +static int lan966x_mac_forget_locked(struct lan966x *lan966x, + const unsigned char mac[ETH_ALEN], + unsigned int vid, + enum macaccess_entry_type type) { + lockdep_assert_held(&lan966x->mac_lock); + lan966x_mac_select(lan966x, mac, vid); /* Issue a forget command */ @@ -128,6 +136,20 @@ int lan966x_mac_forget(struct lan966x *lan966x, return lan966x_mac_wait_for_completion(lan966x); } +int lan966x_mac_forget(struct lan966x *lan966x, + const unsigned char mac[ETH_ALEN], + unsigned int vid, + enum macaccess_entry_type type) +{ + int ret; + + spin_lock(&lan966x->mac_lock); + ret = lan966x_mac_forget_locked(lan966x, mac, vid, type); + spin_unlock(&lan966x->mac_lock); + + return ret; +} + int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid) { return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED); @@ -161,7 +183,7 @@ static struct lan966x_mac_entry *lan966x_mac_alloc_entry(const unsigned char *ma { struct lan966x_mac_entry *mac_entry; - mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL); + mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC); if (!mac_entry) return NULL; @@ -179,7 +201,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x, struct lan966x_mac_entry *res = NULL; struct lan966x_mac_entry *mac_entry; - spin_lock(&lan966x->mac_lock); list_for_each_entry(mac_entry, &lan966x->mac_entries, list) { if (mac_entry->vid == vid && ether_addr_equal(mac, mac_entry->mac) && @@ -188,7 +209,6 @@ static struct lan966x_mac_entry *lan966x_mac_find_entry(struct lan966x *lan966x, break; } } - spin_unlock(&lan966x->mac_lock); return res; } @@ -231,8 +251,11 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port, { struct lan966x_mac_entry *mac_entry; - if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) + spin_lock(&lan966x->mac_lock); + if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) { + spin_unlock(&lan966x->mac_lock); return 0; + } /* In case the entry already exists, don't add it again to SW, * just update HW, but we need to look in the actual HW because @@ -241,21 +264,25 @@ int lan966x_mac_add_entry(struct lan966x *lan966x, struct lan966x_port *port, * add the entry but without the extern_learn flag. */ mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port); - if (mac_entry) - return lan966x_mac_learn(lan966x, port->chip_port, - addr, vid, ENTRYTYPE_LOCKED); + if (mac_entry) { + spin_unlock(&lan966x->mac_lock); + goto mac_learn; + } mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port); - if (!mac_entry) + if (!mac_entry) { + spin_unlock(&lan966x->mac_lock); return -ENOMEM; + } - spin_lock(&lan966x->mac_lock); list_add_tail(&mac_entry->list, &lan966x->mac_entries); spin_unlock(&lan966x->mac_lock); - lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED); lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev); +mac_learn: + lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED); + return 0; } @@ -269,8 +296,9 @@ int lan966x_mac_del_entry(struct lan966x *lan966x, const unsigned char *addr, list) { if (mac_entry->vid == vid && ether_addr_equal(addr, mac_entry->mac)) { - lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, - ENTRYTYPE_LOCKED); + lan966x_mac_forget_locked(lan966x, mac_entry->mac, + mac_entry->vid, + ENTRYTYPE_LOCKED); list_del(&mac_entry->list); kfree(mac_entry); @@ -288,8 +316,8 @@ void lan966x_mac_purge_entries(struct lan966x *lan966x) spin_lock(&lan966x->mac_lock); list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) { - lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid, - ENTRYTYPE_LOCKED); + lan966x_mac_forget_locked(lan966x, mac_entry->mac, + mac_entry->vid, ENTRYTYPE_LOCKED); list_del(&mac_entry->list); kfree(mac_entry); @@ -325,10 +353,13 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, { struct lan966x_mac_entry *mac_entry, *tmp; unsigned char mac[ETH_ALEN] __aligned(2); + struct list_head mac_deleted_entries; u32 dest_idx; u32 column; u16 vid; + INIT_LIST_HEAD(&mac_deleted_entries); + spin_lock(&lan966x->mac_lock); list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) { bool found = false; @@ -362,20 +393,26 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, } if (!found) { - /* Notify the bridge that the entry doesn't exist - * anymore in the HW and remove the entry from the SW - * list - */ - lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, - mac_entry->mac, mac_entry->vid, - lan966x->ports[mac_entry->port_index]->dev); - list_del(&mac_entry->list); - kfree(mac_entry); + /* Move the entry from SW list to a tmp list such that + * it would be deleted later + */ + list_add_tail(&mac_entry->list, &mac_deleted_entries); } } spin_unlock(&lan966x->mac_lock); + list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) { + /* Notify the bridge that the entry doesn't exist + * anymore in the HW + */ + lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, + mac_entry->mac, mac_entry->vid, + lan966x->ports[mac_entry->port_index]->dev); + list_del(&mac_entry->list); + kfree(mac_entry); + } + /* Now go to the list of columns and see if any entry was not in the SW * list, then that means that the entry is new so it needs to notify the * bridge. @@ -396,13 +433,20 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) continue; + spin_lock(&lan966x->mac_lock); + mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx); + if (mac_entry) { + spin_unlock(&lan966x->mac_lock); + continue; + } + mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); - if (!mac_entry) + if (!mac_entry) { + spin_unlock(&lan966x->mac_lock); return; + } mac_entry->row = row; - - spin_lock(&lan966x->mac_lock); list_add_tail(&mac_entry->list, &lan966x->mac_entries); spin_unlock(&lan966x->mac_lock); @@ -424,6 +468,7 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x) lan966x, ANA_MACTINDX); while (1) { + spin_lock(&lan966x->mac_lock); lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT), ANA_MACACCESS_MAC_TABLE_CMD, lan966x, ANA_MACACCESS); @@ -447,12 +492,15 @@ irqreturn_t lan966x_mac_irq_handler(struct lan966x *lan966x) stop = false; if (column == LAN966X_MAC_COLUMNS - 1 && - index == 0 && stop) + index == 0 && stop) { + spin_unlock(&lan966x->mac_lock); break; + } entry[column].mach = lan_rd(lan966x, ANA_MACHDATA); entry[column].macl = lan_rd(lan966x, ANA_MACLDATA); entry[column].maca = lan_rd(lan966x, ANA_MACACCESS); + spin_unlock(&lan966x->mac_lock); /* Once all the columns are read process them */ if (column == LAN966X_MAC_COLUMNS - 1) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 0147de40536533bea52ee9f140a7933ac9b77c65..ffb6f6d05a07b764114db98e0fbe4497e2dfe80b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -474,7 +474,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip4_dst_hoplimit(&rt->dst); ip_rt_put(rt); } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); } } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 38fe77d1035e306c6d3fd13c3fb547d8bfd2cbec..3fe720c5dc9fcd11654fa4a51e291d0d2fe94707 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -298,6 +298,11 @@ static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr, *art_time = ns; } +static int stmmac_cross_ts_isr(struct stmmac_priv *priv) +{ + return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE); +} + static int intel_crosststamp(ktime_t *device, struct system_counterval_t *system, void *ctx) @@ -313,8 +318,6 @@ static int intel_crosststamp(ktime_t *device, u32 num_snapshot; u32 gpio_value; u32 acr_value; - int ret; - u32 v; int i; if (!boot_cpu_has(X86_FEATURE_ART)) @@ -328,6 +331,8 @@ static int intel_crosststamp(ktime_t *device, if (priv->plat->ext_snapshot_en) return -EBUSY; + priv->plat->int_snapshot_en = 1; + mutex_lock(&priv->aux_ts_lock); /* Enable Internal snapshot trigger */ acr_value = readl(ptpaddr + PTP_ACR); @@ -347,6 +352,7 @@ static int intel_crosststamp(ktime_t *device, break; default: mutex_unlock(&priv->aux_ts_lock); + priv->plat->int_snapshot_en = 0; return -EINVAL; } writel(acr_value, ptpaddr + PTP_ACR); @@ -368,13 +374,12 @@ static int intel_crosststamp(ktime_t *device, gpio_value |= GMAC_GPO1; writel(gpio_value, ioaddr + GMAC_GPIO_STATUS); - /* Poll for time sync operation done */ - ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v, - (v & GMAC_INT_TSIE), 100, 10000); - - if (ret == -ETIMEDOUT) { - pr_err("%s: Wait for time sync operation timeout\n", __func__); - return ret; + /* Time sync done Indication - Interrupt method */ + if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait, + stmmac_cross_ts_isr(priv), + HZ / 100)) { + priv->plat->int_snapshot_en = 0; + return -ETIMEDOUT; } num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) & @@ -392,6 +397,7 @@ static int intel_crosststamp(ktime_t *device, } system->cycles *= intel_priv->crossts_adj; + priv->plat->int_snapshot_en = 0; return 0; } @@ -576,6 +582,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->has_crossts = true; plat->crosststamp = intel_crosststamp; + plat->int_snapshot_en = 0; /* Setup MSI vector offset specific to Intel mGbE controller */ plat->msi_mac_vec = 29; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 6ff88df5876733dd6dddd746547af0d1c939b78b..ca8ab290013ce3fd79b9251fedc8f9a59b8d8381 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -576,32 +576,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) } } - ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); - if (ret) { - dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); - return ret; - } - - ret = clk_prepare_enable(plat->rmii_internal_clk); - if (ret) { - dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret); - goto err_clk; - } - return 0; - -err_clk: - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); - return ret; -} - -static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct mediatek_dwmac_plat_data *plat = priv; - const struct mediatek_dwmac_variant *variant = plat->variant; - - clk_disable_unprepare(plat->rmii_internal_clk); - clk_bulk_disable_unprepare(variant->num_clks, plat->clks); } static int mediatek_dwmac_clks_config(void *priv, bool enabled) @@ -643,7 +618,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->addr64 = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; - plat->exit = mediatek_dwmac_exit; plat->clks_config = mediatek_dwmac_clks_config; if (priv_plat->variant->dwmac_fix_mac_speed) plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; @@ -712,13 +686,32 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) mediatek_dwmac_common_data(pdev, plat_dat, priv_plat); mediatek_dwmac_init(pdev, priv_plat); + ret = mediatek_dwmac_clks_config(priv_plat, true); + if (ret) + return ret; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) { stmmac_remove_config_dt(pdev, plat_dat); - return ret; + goto err_drv_probe; } return 0; + +err_drv_probe: + mediatek_dwmac_clks_config(priv_plat, false); + return ret; +} + +static int mediatek_dwmac_remove(struct platform_device *pdev) +{ + struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev); + int ret; + + ret = stmmac_pltfr_remove(pdev); + mediatek_dwmac_clks_config(priv_plat, false); + + return ret; } static const struct of_device_id mediatek_dwmac_match[] = { @@ -733,7 +726,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match); static struct platform_driver mediatek_dwmac_driver = { .probe = mediatek_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = mediatek_dwmac_remove, .driver = { .name = "dwmac-mediatek", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 462ca7ed095a2a8e8d4b5462191d02c756797098..71dad409f78b075eeaed6d2bacee3b47092aca84 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -150,7 +150,8 @@ #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ GMAC_INT_PCS_ANE) -#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN) +#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \ + GMAC_INT_TSIE) enum dwmac4_irq_status { time_stamp_irq = 0x00001000, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index fd41db65fe1df41fe53d24f19d6a39f04173bf10..d8f1fbc25bdd3e7a7e66e9f7d983137492997e1a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -23,6 +23,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, struct net_device *dev) { + struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); @@ -58,6 +59,9 @@ static void dwmac4_core_init(struct mac_device_info *hw, value |= GMAC_INT_FPE_EN; writel(value, ioaddr + GMAC_INT_EN); + + if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE) + init_waitqueue_head(&priv->tstamp_busy_wait); } static void dwmac4_rx_queue_enable(struct mac_device_info *hw, @@ -219,6 +223,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) if (queue == 0 || queue == 4) { value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK; value |= MTL_RXQ_DMA_Q04MDMACH(chan); + } else if (queue > 4) { + value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4); + value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4); } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 57970ae2178dab8a559a66e658443af91bb997bd..f9e83964aa7ea63beac580bf6fe906e9b252884a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -266,6 +266,7 @@ struct stmmac_priv { rwlock_t ptp_lock; /* Protects auxiliary snapshot registers from concurrent access. */ struct mutex aux_ts_lock; + wait_queue_head_t tstamp_busy_wait; void __iomem *mmcaddr; void __iomem *ptpaddr; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index abfb3cd5958dfe353c184a98c07532c9ddb2753d..9c3055ee26085f645494fc708ef1f19739a892fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -803,14 +803,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, netdev_warn(priv->dev, "Setting EEE tx-lpi is not supported\n"); - if (priv->hw->xpcs) { - ret = xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - edata->eee_enabled); - if (ret) - return ret; - } - if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 92d32940aff00660663e709fd8b7196500e96e16..764832f4dae1a7b241e07d8f30e725eb6c83a870 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -179,6 +179,11 @@ static void timestamp_interrupt(struct stmmac_priv *priv) u64 ptp_time; int i; + if (priv->plat->int_snapshot_en) { + wake_up(&priv->tstamp_busy_wait); + return; + } + tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE; if (!tsync_int) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d1a7cf4567bc26bb34a605404b2303e1f87b9bb7..c5f33630e7718384fc0a0a63b36e5ca7dea5f549 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -834,19 +834,10 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; - int ret; if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) { - netdev_warn(priv->dev, - "failed to enable PTP reference clock: %pe\n", - ERR_PTR(ret)); - return ret; - } - stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; @@ -3270,6 +3261,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_mmc_setup(priv); + if (ptp_register) { + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + } + ret = stmmac_init_ptp(priv); if (ret == -EOPNOTSUPP) netdev_info(priv->dev, "PTP not supported by HW\n"); @@ -7213,8 +7212,6 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); pm_runtime_get_sync(dev); - pm_runtime_disable(dev); - pm_runtime_put_noidle(dev); stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); @@ -7241,6 +7238,9 @@ int stmmac_dvr_remove(struct device *dev) mutex_destroy(&priv->lock); bitmap_free(priv->af_xdp_zc_qps); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + return 0; } EXPORT_SYMBOL_GPL(stmmac_dvr_remove); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 11e1055e8260f49962ca88271337eb022b3fa3c5..9f5cac4000da680d061c37171817fb8642abee55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -815,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; - stmmac_init_tstamp_counter(priv, priv->systime_flags); + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } } return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e45fb191d8e6ef86317b40f4743172204a191773..4d11980dcd64dc063b5cf3fda361f84560f01ef7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -175,11 +175,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, struct stmmac_priv *priv = container_of(ptp, struct stmmac_priv, ptp_clock_ops); void __iomem *ptpaddr = priv->ptpaddr; - void __iomem *ioaddr = priv->hw->pcsr; struct stmmac_pps_cfg *cfg; - u32 intr_value, acr_value; int ret = -EOPNOTSUPP; unsigned long flags; + u32 acr_value; switch (rq->type) { case PTP_CLK_REQ_PEROUT: @@ -213,19 +212,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n", priv->plat->ext_snapshot_num >> PTP_ACR_ATSEN_SHIFT); - /* Enable Timestamp Interrupt */ - intr_value = readl(ioaddr + GMAC_INT_EN); - intr_value |= GMAC_INT_TSIE; - writel(intr_value, ioaddr + GMAC_INT_EN); - } else { netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n", priv->plat->ext_snapshot_num >> PTP_ACR_ATSEN_SHIFT); - /* Disable Timestamp Interrupt */ - intr_value = readl(ioaddr + GMAC_INT_EN); - intr_value &= ~GMAC_INT_TSIE; - writel(intr_value, ioaddr + GMAC_INT_EN); } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index b082819509e10dc6339d08e18c4c8044eba9bb6c..0f6efaabaa32b892ccb57355bb9db3941ec216ec 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "12" /* Information for net */ -#define NET_VERSION "12" +#define NET_VERSION "13" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -5917,7 +5917,8 @@ static void r8153_enter_oob(struct r8152 *tp) wait_oob_link_list_ready(tp); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu)); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); switch (tp->version) { case RTL_VER_03: @@ -5953,6 +5954,10 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rxdy_gated_en(tp, false); ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); @@ -6555,6 +6560,9 @@ static void rtl8156_down(struct r8152 *tp) rtl_disable(tp); rtl_reset_bmu(tp); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522); + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT); + /* Clear teredo wake event. bit[15:8] is the teredo wakeup * type. Set it to zero. bits[7:0] are the W1C bits about * the events. Set them to all 1 to clear them. @@ -6565,6 +6573,10 @@ static void rtl8156_down(struct r8152 *tp) ocp_data |= NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7); + ocp_data |= MCU_BORW_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data); + rtl_rx_vlan_en(tp, true); rxdy_gated_en(tp, false); diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index f52960d2dfbe8e060468ea79bd8b709b2570991f..bff144c97e66ec9556a2565745560bbdb09853b4 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -32,7 +32,7 @@ config DEBUG_PINCTRL Say Y here to add some extra checks and diagnostics to PINCTRL calls. config PINCTRL_AMD - tristate "AMD GPIO pin control" + bool "AMD GPIO pin control" depends on HAS_IOMEM depends on ACPI || COMPILE_TEST select GPIOLIB diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index a140b6bfbfaa675c7eee4ac0b775f2bb862be9f6..bcde042d29dc3b6811a41b8d6b7c8676a4199cf1 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -102,7 +102,7 @@ struct armada_37xx_pinctrl { struct device *dev; struct gpio_chip gpio_chip; struct irq_chip irq_chip; - spinlock_t irq_lock; + raw_spinlock_t irq_lock; struct pinctrl_desc pctl; struct pinctrl_dev *pctl_dev; struct armada_37xx_pin_group *groups; @@ -523,9 +523,9 @@ static void armada_37xx_irq_ack(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); writel(d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static void armada_37xx_irq_mask(struct irq_data *d) @@ -536,10 +536,10 @@ static void armada_37xx_irq_mask(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); writel(val & ~d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static void armada_37xx_irq_unmask(struct irq_data *d) @@ -550,10 +550,10 @@ static void armada_37xx_irq_unmask(struct irq_data *d) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); writel(val | d->mask, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on) @@ -564,14 +564,14 @@ static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on) unsigned long flags; armada_37xx_irq_update_reg(®, d); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); val = readl(info->base + reg); if (on) val |= (BIT(d->hwirq % GPIO_PER_REG)); else val &= ~(BIT(d->hwirq % GPIO_PER_REG)); writel(val, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return 0; } @@ -583,7 +583,7 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type) u32 val, reg = IRQ_POL; unsigned long flags; - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); armada_37xx_irq_update_reg(®, d); val = readl(info->base + reg); switch (type) { @@ -607,11 +607,11 @@ static int armada_37xx_irq_set_type(struct irq_data *d, unsigned int type) break; } default: - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return -EINVAL; } writel(val, info->base + reg); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return 0; } @@ -626,7 +626,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info, regmap_read(info->regmap, INPUT_VAL + 4*reg_idx, &l); - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); p = readl(info->base + IRQ_POL + 4 * reg_idx); if ((p ^ l) & (1 << bit_num)) { /* @@ -647,7 +647,7 @@ static int armada_37xx_edge_both_irq_swap_pol(struct armada_37xx_pinctrl *info, ret = -1; } - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); return ret; } @@ -664,11 +664,11 @@ static void armada_37xx_irq_handler(struct irq_desc *desc) u32 status; unsigned long flags; - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); status = readl_relaxed(info->base + IRQ_STATUS + 4 * i); /* Manage only the interrupt that was enabled */ status &= readl_relaxed(info->base + IRQ_EN + 4 * i); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); while (status) { u32 hwirq = ffs(status) - 1; u32 virq = irq_find_mapping(d, hwirq + @@ -695,12 +695,12 @@ static void armada_37xx_irq_handler(struct irq_desc *desc) update_status: /* Update status in case a new IRQ appears */ - spin_lock_irqsave(&info->irq_lock, flags); + raw_spin_lock_irqsave(&info->irq_lock, flags); status = readl_relaxed(info->base + IRQ_STATUS + 4 * i); /* Manage only the interrupt that was enabled */ status &= readl_relaxed(info->base + IRQ_EN + 4 * i); - spin_unlock_irqrestore(&info->irq_lock, flags); + raw_spin_unlock_irqrestore(&info->irq_lock, flags); } } chained_irq_exit(chip, desc); @@ -731,7 +731,7 @@ static int armada_37xx_irqchip_register(struct platform_device *pdev, struct device *dev = &pdev->dev; unsigned int i, nr_irq_parent; - spin_lock_init(&info->irq_lock); + raw_spin_lock_init(&info->irq_lock); nr_irq_parent = of_irq_count(np); if (!nr_irq_parent) { @@ -1107,25 +1107,40 @@ static const struct of_device_id armada_37xx_pinctrl_of_match[] = { { }, }; +static const struct regmap_config armada_37xx_pinctrl_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .use_raw_spinlock = true, +}; + static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev) { struct armada_37xx_pinctrl *info; struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct regmap *regmap; + void __iomem *base; int ret; + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(base)) { + dev_err(dev, "failed to ioremap base address: %pe\n", base); + return PTR_ERR(base); + } + + regmap = devm_regmap_init_mmio(dev, base, + &armada_37xx_pinctrl_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "failed to create regmap: %pe\n", regmap); + return PTR_ERR(regmap); + } + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->dev = dev; - - regmap = syscon_node_to_regmap(np); - if (IS_ERR(regmap)) - return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n"); info->regmap = regmap; - info->data = of_device_get_match_data(dev); ret = armada_37xx_pinctrl_register(pdev, info); diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 5f4a8c5c66508ac6bf96789403e7dd1f061a2cf3..dfc8ea9f3843cfcf57d45fa920871b15a7341b9b 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -29,19 +29,12 @@ #define ocelot_clrsetbits(addr, clear, set) \ writel((readl(addr) & ~(clear)) | (set), (addr)) -/* PINCONFIG bits (sparx5 only) */ enum { PINCONF_BIAS, PINCONF_SCHMITT, PINCONF_DRIVE_STRENGTH, }; -#define BIAS_PD_BIT BIT(4) -#define BIAS_PU_BIT BIT(3) -#define BIAS_BITS (BIAS_PD_BIT|BIAS_PU_BIT) -#define SCHMITT_BIT BIT(2) -#define DRIVE_BITS GENMASK(1, 0) - /* GPIO standard registers */ #define OCELOT_GPIO_OUT_SET 0x0 #define OCELOT_GPIO_OUT_CLR 0x4 @@ -321,6 +314,13 @@ struct ocelot_pin_caps { unsigned char a_functions[OCELOT_FUNC_PER_PIN]; /* Additional functions */ }; +struct ocelot_pincfg_data { + u8 pd_bit; + u8 pu_bit; + u8 drive_bits; + u8 schmitt_bit; +}; + struct ocelot_pinctrl { struct device *dev; struct pinctrl_dev *pctl; @@ -328,10 +328,16 @@ struct ocelot_pinctrl { struct regmap *map; struct regmap *pincfg; struct pinctrl_desc *desc; + const struct ocelot_pincfg_data *pincfg_data; struct ocelot_pmx_func func[FUNC_MAX]; u8 stride; }; +struct ocelot_match_data { + struct pinctrl_desc desc; + struct ocelot_pincfg_data pincfg_data; +}; + #define LUTON_P(p, f0, f1) \ static struct ocelot_pin_caps luton_pin_##p = { \ .pin = p, \ @@ -1325,24 +1331,27 @@ static int ocelot_hw_get_value(struct ocelot_pinctrl *info, int ret = -EOPNOTSUPP; if (info->pincfg) { + const struct ocelot_pincfg_data *opd = info->pincfg_data; u32 regcfg; - ret = regmap_read(info->pincfg, pin, ®cfg); + ret = regmap_read(info->pincfg, + pin * regmap_get_reg_stride(info->pincfg), + ®cfg); if (ret) return ret; ret = 0; switch (reg) { case PINCONF_BIAS: - *val = regcfg & BIAS_BITS; + *val = regcfg & (opd->pd_bit | opd->pu_bit); break; case PINCONF_SCHMITT: - *val = regcfg & SCHMITT_BIT; + *val = regcfg & opd->schmitt_bit; break; case PINCONF_DRIVE_STRENGTH: - *val = regcfg & DRIVE_BITS; + *val = regcfg & opd->drive_bits; break; default: @@ -1359,14 +1368,18 @@ static int ocelot_pincfg_clrsetbits(struct ocelot_pinctrl *info, u32 regaddr, u32 val; int ret; - ret = regmap_read(info->pincfg, regaddr, &val); + ret = regmap_read(info->pincfg, + regaddr * regmap_get_reg_stride(info->pincfg), + &val); if (ret) return ret; val &= ~clrbits; val |= setbits; - ret = regmap_write(info->pincfg, regaddr, val); + ret = regmap_write(info->pincfg, + regaddr * regmap_get_reg_stride(info->pincfg), + val); return ret; } @@ -1379,23 +1392,27 @@ static int ocelot_hw_set_value(struct ocelot_pinctrl *info, int ret = -EOPNOTSUPP; if (info->pincfg) { + const struct ocelot_pincfg_data *opd = info->pincfg_data; ret = 0; switch (reg) { case PINCONF_BIAS: - ret = ocelot_pincfg_clrsetbits(info, pin, BIAS_BITS, + ret = ocelot_pincfg_clrsetbits(info, pin, + opd->pd_bit | opd->pu_bit, val); break; case PINCONF_SCHMITT: - ret = ocelot_pincfg_clrsetbits(info, pin, SCHMITT_BIT, + ret = ocelot_pincfg_clrsetbits(info, pin, + opd->schmitt_bit, val); break; case PINCONF_DRIVE_STRENGTH: if (val <= 3) ret = ocelot_pincfg_clrsetbits(info, pin, - DRIVE_BITS, val); + opd->drive_bits, + val); else ret = -EINVAL; break; @@ -1425,17 +1442,20 @@ static int ocelot_pinconf_get(struct pinctrl_dev *pctldev, if (param == PIN_CONFIG_BIAS_DISABLE) val = (val == 0); else if (param == PIN_CONFIG_BIAS_PULL_DOWN) - val = (val & BIAS_PD_BIT ? true : false); + val = !!(val & info->pincfg_data->pd_bit); else /* PIN_CONFIG_BIAS_PULL_UP */ - val = (val & BIAS_PU_BIT ? true : false); + val = !!(val & info->pincfg_data->pu_bit); break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: + if (!info->pincfg_data->schmitt_bit) + return -EOPNOTSUPP; + err = ocelot_hw_get_value(info, pin, PINCONF_SCHMITT, &val); if (err) return err; - val = (val & SCHMITT_BIT ? true : false); + val = !!(val & info->pincfg_data->schmitt_bit); break; case PIN_CONFIG_DRIVE_STRENGTH: @@ -1479,6 +1499,7 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, unsigned long *configs, unsigned int num_configs) { struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); + const struct ocelot_pincfg_data *opd = info->pincfg_data; u32 param, arg, p; int cfg, err = 0; @@ -1491,8 +1512,8 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_BIAS_PULL_DOWN: arg = (param == PIN_CONFIG_BIAS_DISABLE) ? 0 : - (param == PIN_CONFIG_BIAS_PULL_UP) ? BIAS_PU_BIT : - BIAS_PD_BIT; + (param == PIN_CONFIG_BIAS_PULL_UP) ? + opd->pu_bit : opd->pd_bit; err = ocelot_hw_set_value(info, pin, PINCONF_BIAS, arg); if (err) @@ -1501,7 +1522,10 @@ static int ocelot_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: - arg = arg ? SCHMITT_BIT : 0; + if (!opd->schmitt_bit) + return -EOPNOTSUPP; + + arg = arg ? opd->schmitt_bit : 0; err = ocelot_hw_set_value(info, pin, PINCONF_SCHMITT, arg); if (err) @@ -1562,69 +1586,94 @@ static const struct pinctrl_ops ocelot_pctl_ops = { .dt_free_map = pinconf_generic_dt_free_map, }; -static struct pinctrl_desc luton_desc = { - .name = "luton-pinctrl", - .pins = luton_pins, - .npins = ARRAY_SIZE(luton_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data luton_desc = { + .desc = { + .name = "luton-pinctrl", + .pins = luton_pins, + .npins = ARRAY_SIZE(luton_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc serval_desc = { - .name = "serval-pinctrl", - .pins = serval_pins, - .npins = ARRAY_SIZE(serval_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data serval_desc = { + .desc = { + .name = "serval-pinctrl", + .pins = serval_pins, + .npins = ARRAY_SIZE(serval_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc ocelot_desc = { - .name = "ocelot-pinctrl", - .pins = ocelot_pins, - .npins = ARRAY_SIZE(ocelot_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data ocelot_desc = { + .desc = { + .name = "ocelot-pinctrl", + .pins = ocelot_pins, + .npins = ARRAY_SIZE(ocelot_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc jaguar2_desc = { - .name = "jaguar2-pinctrl", - .pins = jaguar2_pins, - .npins = ARRAY_SIZE(jaguar2_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data jaguar2_desc = { + .desc = { + .name = "jaguar2-pinctrl", + .pins = jaguar2_pins, + .npins = ARRAY_SIZE(jaguar2_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc servalt_desc = { - .name = "servalt-pinctrl", - .pins = servalt_pins, - .npins = ARRAY_SIZE(servalt_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .owner = THIS_MODULE, +static struct ocelot_match_data servalt_desc = { + .desc = { + .name = "servalt-pinctrl", + .pins = servalt_pins, + .npins = ARRAY_SIZE(servalt_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .owner = THIS_MODULE, + }, }; -static struct pinctrl_desc sparx5_desc = { - .name = "sparx5-pinctrl", - .pins = sparx5_pins, - .npins = ARRAY_SIZE(sparx5_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &ocelot_pmx_ops, - .confops = &ocelot_confops, - .owner = THIS_MODULE, +static struct ocelot_match_data sparx5_desc = { + .desc = { + .name = "sparx5-pinctrl", + .pins = sparx5_pins, + .npins = ARRAY_SIZE(sparx5_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &ocelot_pmx_ops, + .confops = &ocelot_confops, + .owner = THIS_MODULE, + }, + .pincfg_data = { + .pd_bit = BIT(4), + .pu_bit = BIT(3), + .drive_bits = GENMASK(1, 0), + .schmitt_bit = BIT(2), + }, }; -static struct pinctrl_desc lan966x_desc = { - .name = "lan966x-pinctrl", - .pins = lan966x_pins, - .npins = ARRAY_SIZE(lan966x_pins), - .pctlops = &ocelot_pctl_ops, - .pmxops = &lan966x_pmx_ops, - .confops = &ocelot_confops, - .owner = THIS_MODULE, +static struct ocelot_match_data lan966x_desc = { + .desc = { + .name = "lan966x-pinctrl", + .pins = lan966x_pins, + .npins = ARRAY_SIZE(lan966x_pins), + .pctlops = &ocelot_pctl_ops, + .pmxops = &lan966x_pmx_ops, + .confops = &ocelot_confops, + .owner = THIS_MODULE, + }, + .pincfg_data = { + .pd_bit = BIT(3), + .pu_bit = BIT(2), + .drive_bits = GENMASK(1, 0), + }, }; static int ocelot_create_group_func_map(struct device *dev, @@ -1890,7 +1939,8 @@ static const struct of_device_id ocelot_pinctrl_of_match[] = { {}, }; -static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) +static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev, + const struct ocelot_pinctrl *info) { void __iomem *base; @@ -1898,7 +1948,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = 32, + .max_register = info->desc->npins * 4, .name = "pincfg", }; @@ -1913,6 +1963,7 @@ static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev) static int ocelot_pinctrl_probe(struct platform_device *pdev) { + const struct ocelot_match_data *data; struct device *dev = &pdev->dev; struct ocelot_pinctrl *info; struct reset_control *reset; @@ -1929,7 +1980,16 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev) if (!info) return -ENOMEM; - info->desc = (struct pinctrl_desc *)device_get_match_data(dev); + data = device_get_match_data(dev); + if (!data) + return -EINVAL; + + info->desc = devm_kmemdup(dev, &data->desc, sizeof(*info->desc), + GFP_KERNEL); + if (!info->desc) + return -ENOMEM; + + info->pincfg_data = &data->pincfg_data; reset = devm_reset_control_get_optional_shared(dev, "switch"); if (IS_ERR(reset)) @@ -1956,7 +2016,7 @@ static int ocelot_pinctrl_probe(struct platform_device *pdev) /* Pinconf registers */ if (info->desc->confops) { - pincfg = ocelot_pinctrl_create_pincfg(pdev); + pincfg = ocelot_pinctrl_create_pincfg(pdev, info); if (IS_ERR(pincfg)) dev_dbg(dev, "Failed to create pincfg regmap\n"); else diff --git a/drivers/pinctrl/ralink/pinctrl-ralink.c b/drivers/pinctrl/ralink/pinctrl-ralink.c index 63429a2874343a984966925261a4608f00a9f6d0..770862f45b3fe1c53fcf047bc92da02f09a2195f 100644 --- a/drivers/pinctrl/ralink/pinctrl-ralink.c +++ b/drivers/pinctrl/ralink/pinctrl-ralink.c @@ -266,6 +266,8 @@ static int ralink_pinctrl_pins(struct ralink_priv *p) p->func[i]->pin_count, sizeof(int), GFP_KERNEL); + if (!p->func[i]->pins) + return -ENOMEM; for (j = 0; j < p->func[i]->pin_count; j++) p->func[i]->pins[j] = p->func[i]->pin_first + j; diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index 3ba47040ac423013f5aebddd8b047c26d4c495bc..2b3335ab56c66867b21c4c41d385acc4b001fa9d 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -871,6 +871,9 @@ static int sppctl_dt_node_to_map(struct pinctrl_dev *pctldev, struct device_node } *map = kcalloc(*num_maps + nmG, sizeof(**map), GFP_KERNEL); + if (*map == NULL) + return -ENOMEM; + for (i = 0; i < (*num_maps); i++) { dt_pin = be32_to_cpu(list[i]); pin_num = FIELD_GET(GENMASK(31, 24), dt_pin); diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 775c0bf2f923d4818fc940e9b0eb11555862d7d4..0933948d7df3d85c489395e84931f7f346dc61c8 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -1138,10 +1138,14 @@ static void bcm2835_spi_handle_err(struct spi_controller *ctlr, struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr); /* if an error occurred and we have an active dma, then terminate */ - dmaengine_terminate_sync(ctlr->dma_tx); - bs->tx_dma_active = false; - dmaengine_terminate_sync(ctlr->dma_rx); - bs->rx_dma_active = false; + if (ctlr->dma_tx) { + dmaengine_terminate_sync(ctlr->dma_tx); + bs->tx_dma_active = false; + } + if (ctlr->dma_rx) { + dmaengine_terminate_sync(ctlr->dma_rx); + bs->rx_dma_active = false; + } bcm2835_spi_undo_prologue(bs); /* and reset */ diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 31d778e9d255b9ceb8848188d3a26c6a8540384e..6a7f7df1e7764f297788c01aad46c0b8fcc0bdeb 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -69,7 +69,7 @@ #define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */ #define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */ #define CDNS_SPI_SS0 0x1 /* Slave Select zero */ -#define CDNS_SPI_NOSS 0x3C /* No Slave select */ +#define CDNS_SPI_NOSS 0xF /* No Slave select */ /* * SPI Interrupt Registers bit Masks diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 7a014eeec2d0d9a2072a4190042d14ffe14bf454..411b1307b7fd82679ecd17678bf5d7b64ba3e906 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -613,6 +613,10 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx, rspi->dma_callbacked, HZ); if (ret > 0 && rspi->dma_callbacked) { ret = 0; + if (tx) + dmaengine_synchronize(rspi->ctlr->dma_tx); + if (rx) + dmaengine_synchronize(rspi->ctlr->dma_rx); } else { if (!ret) { dev_err(&rspi->ctlr->dev, "DMA timeout\n"); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 90ce16b6e05f91e9f29173d9def568ab4075f863..f422f9c58ba790c94957f4c38da4f8599fbc1e44 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -632,16 +632,19 @@ static int __init sev_guest_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct snp_guest_dev *snp_dev; struct miscdevice *misc; + void __iomem *mapping; int ret; if (!dev->platform_data) return -ENODEV; data = (struct sev_guest_platform_data *)dev->platform_data; - layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); - if (!layout) + mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); + if (!mapping) return -ENODEV; + layout = (__force void *)mapping; + ret = -ENOMEM; snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL); if (!snp_dev) @@ -706,7 +709,7 @@ static int __init sev_guest_probe(struct platform_device *pdev) e_free_request: free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg)); e_unmap: - iounmap(layout); + iounmap(mapping); return ret; } diff --git a/fs/io_uring.c b/fs/io_uring.c index a01ea49f30173f27cf2b19128d368cb01155de2f..e8e769be9ed0581238efdbf223ff448922ba52c3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1737,6 +1737,14 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) (req->flags & REQ_F_PARTIAL_IO)) return; + /* + * READV uses fields in `struct io_rw` (len/addr) to stash the selected + * buffer data. However if that buffer is recycled the original request + * data stored in addr is lost. Therefore forbid recycling for now. + */ + if (req->opcode == IORING_OP_READV) + return; + /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear * the flag and hence ensure that bl->head doesn't get incremented. @@ -12931,7 +12939,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; - struct io_buffer_list *bl; + struct io_buffer_list *bl, *free_bl = NULL; struct page **pages; int nr_pages; @@ -12963,7 +12971,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (bl->buf_nr_pages || !list_empty(&bl->buf_list)) return -EEXIST; } else { - bl = kzalloc(sizeof(*bl), GFP_KERNEL); + free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); if (!bl) return -ENOMEM; } @@ -12972,7 +12980,7 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct_size(br, bufs, reg.ring_entries), &nr_pages); if (IS_ERR(pages)) { - kfree(bl); + kfree(free_bl); return PTR_ERR(pages); } diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 4de597a83b88dd717459b33ff4c363888f72de60..52615e6090e1c80cbc9c326a54cc9003ba9daf1f 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { - if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + - le32_to_cpu(ctx->mrec->bytes_allocated)) + u8 *mrec_end = (u8 *)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated); + u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + + a->name_length * sizeof(ntfschar); + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || + name_end > mrec_end) break; ctx->attr = a; if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3375275714612663b759fd676270ab6067071b32..740b64238312733c81ec4b89cfe1da106c37bc65 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -277,7 +277,6 @@ enum ocfs2_mount_options OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ - OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb) { - return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) - || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); + return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 0b0ae3ebb0cf5ea2e5ce5cfb4547e1f16238a117..da7718cef735e53e08539f152677042bbf71ad1a 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { - if (!si->si_slots[preferred].sl_valid || - !si->si_slots[preferred].sl_node_num) { + if (!si->si_slots[preferred].sl_valid) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (!si->si_slots[i].sl_valid || - !si->si_slots[i].sl_node_num) { + if (!si->si_slots[i].sl_valid) { ret = i; break; } @@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - if (ocfs2_mount_local(osb)) - /* use slot 0 directly in local mode */ - slot = 0; - else { - /* search for ourselves first and take the slot if it already - * exists. Perhaps we need to mark this in a variable for our - * own journal recovery? Possibly not, though we certainly - * need to warn to the user */ - slot = __ocfs2_node_num_to_slot(si, osb->node_num); + /* search for ourselves first and take the slot if it already + * exists. Perhaps we need to mark this in a variable for our + * own journal recovery? Possibly not, though we certainly + * need to warn to the user */ + slot = __ocfs2_node_num_to_slot(si, osb->node_num); + if (slot < 0) { + /* if no slot yet, then just take 1st available + * one. */ + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); if (slot < 0) { - /* if no slot yet, then just take 1st available - * one. */ - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot < 0) { - spin_unlock(&osb->osb_lock); - mlog(ML_ERROR, "no free slots available!\n"); - status = -EINVAL; - goto bail; - } - } else - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " - "already allocated to this node!\n", - slot, osb->dev_str); - } + spin_unlock(&osb->osb_lock); + mlog(ML_ERROR, "no free slots available!\n"); + status = -EINVAL; + goto bail; + } + } else + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " + "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f7298816d8d9ba4dbf3bc667e55653a1e50b7cdc..438be028935d2e6960729f29b20d534a87076c28 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -172,7 +172,6 @@ enum { Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, - Opt_nocluster, Opt_err, }; @@ -206,7 +205,6 @@ static const match_table_t tokens = { {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, - {Opt_nocluster, "nocluster"}, {Opt_err, NULL} }; @@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - tmp = OCFS2_MOUNT_NOCLUSTER; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { - ret = -EINVAL; - mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); - goto out; - } - tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { @@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, } if (ocfs2_userspace_stack(osb) && - !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, @@ -1137,11 +1127,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); - if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && - !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) - printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " - "without cluster aware mode.\n", osb->dev_str); - atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); @@ -1452,9 +1437,6 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; - case Opt_nocluster: - mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; - break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1566,9 +1548,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); - if (opts & OCFS2_MOUNT_NOCLUSTER) - seq_printf(s, ",nocluster"); - return 0; } diff --git a/fs/read_write.c b/fs/read_write.c index e0777eefd84650debb0bed304401f81d8ccb1c8e..397da0236607ee10c90b4b2a02d9183813d203f9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1263,6 +1263,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, count, fl); file_end_write(out.file); } else { + if (out.file->f_flags & O_NONBLOCK) + fl |= SPLICE_F_NONBLOCK; + retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index e943370107d06afab21d37fc43e2aa3714fb816e..de86f5b2859f941c9d2b385197764a33356b155b 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -192,17 +192,19 @@ static inline void msg_init(struct uffd_msg *msg) } static inline struct uffd_msg userfault_msg(unsigned long address, + unsigned long real_address, unsigned int flags, unsigned long reason, unsigned int features) { struct uffd_msg msg; + msg_init(&msg); msg.event = UFFD_EVENT_PAGEFAULT; - if (!(features & UFFD_FEATURE_EXACT_ADDRESS)) - address &= PAGE_MASK; - msg.arg.pagefault.address = address; + msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? + real_address : address; + /* * These flags indicate why the userfault occurred: * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. @@ -488,8 +490,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; - uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason, - ctx->features); + uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, + reason, ctx->features); uwq.ctx = ctx; uwq.waken = false; diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ff3e82553a76c16baeaf25d00c849bfb5f90b2db..492dce43236eacd41b5f822a5a34ca56623be237 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -158,9 +158,24 @@ * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * + * MMU_GATHER_NO_FLUSH_CACHE + * + * Indicates the architecture has flush_cache_range() but it needs *NOT* be called + * before unmapping a VMA. + * + * NOTE: strictly speaking we shouldn't have this knob and instead rely on + * flush_cache_range() being a NOP, except Sparc64 seems to be + * different here. + * + * MMU_GATHER_MERGE_VMAS + * + * Indicates the architecture wants to merge ranges over VMAs; typical when + * multiple range invalidates are more expensive than a full invalidate. + * * MMU_GATHER_NO_RANGE * - * Use this if your architecture lacks an efficient flush_tlb_range(). + * Use this if your architecture lacks an efficient flush_tlb_range(). This + * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * @@ -288,6 +303,7 @@ struct mmu_gather { */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; + unsigned int vma_pfn : 1; unsigned int batch_count; @@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) #ifdef CONFIG_MMU_GATHER_NO_RANGE -#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) -#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#if defined(tlb_flush) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* @@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#define tlb_end_vma tlb_end_vma -static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush - -#if defined(tlb_start_vma) || defined(tlb_end_vma) -#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() -#endif - /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation @@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_range(&vma, tlb->start, tlb->end); } } +#endif + +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) @@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); + tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } -#else - -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#endif - -#endif /* CONFIG_MMU_GATHER_NO_RANGE */ - static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* @@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ -#ifndef tlb_start_vma static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); +#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); -} #endif +} -#ifndef tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs, - * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on - * this. + * VM_PFNMAP is more fragile because the core mm will not track the + * page mapcount -- there might not be page-frames for these PFNs after + * all. Force flush TLBs for such ranges to avoid munmap() vs + * unmap_mapping_range() races. */ - tlb_flush_mmu_tlbonly(tlb); + if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs. + */ + tlb_flush_mmu_tlbonly(tlb); + } } -#endif /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 0fca8f38bee4dd85481a4632cb74691e8f5a9610..addb135eeea6227459e5247dfe382f6df239a12e 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -28,7 +28,7 @@ #include #include #include -#include +#include #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -295,7 +295,7 @@ struct drm_sched_job { */ union { struct dma_fence_cb finish_cb; - struct irq_work work; + struct work_struct work; }; uint64_t id; diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6be67fd978ef8c0d3f277a81c132b7..7898e29bcfb5448178004f196d583d2e362b20f6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page) #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page(struct page *page); -static inline bool put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs); +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - return __put_devmap_managed_page(page); + return __put_devmap_managed_page_refs(page, refs); } - #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +static inline bool put_devmap_managed_page(struct page *page) +{ + return put_devmap_managed_page_refs(page, 1); +} + /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 29917850f07946b57dfdbd8c5063220c547d5d5d..8df475db88c040456e887696ac725f1b50451c69 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -260,6 +260,7 @@ struct plat_stmmacenet_data { bool has_crossts; int int_snapshot_num; int ext_snapshot_num; + bool int_snapshot_en; bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; diff --git a/include/net/amt.h b/include/net/amt.h index 0e40c3d64fcf4228e5d90678aa1c1daafe644725..08fc30cf2f34c48c8826cd96be07fb7c753580f7 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -78,6 +78,15 @@ enum amt_status { #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) +/* Gateway events only */ +enum amt_event { + AMT_EVENT_NONE, + AMT_EVENT_RECEIVE, + AMT_EVENT_SEND_DISCOVERY, + AMT_EVENT_SEND_REQUEST, + __AMT_EVENT_MAX, +}; + struct amt_header { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 type:4, @@ -292,6 +301,12 @@ struct amt_group_node { struct hlist_head sources[]; }; +#define AMT_MAX_EVENTS 16 +struct amt_events { + enum amt_event event; + struct sk_buff *skb; +}; + struct amt_dev { struct net_device *dev; struct net_device *stream_dev; @@ -308,6 +323,7 @@ struct amt_dev { struct delayed_work req_wq; /* Protected by RTNL */ struct delayed_work secret_wq; + struct work_struct event_wq; /* AMT status */ enum amt_status status; /* Generated key */ @@ -345,6 +361,10 @@ struct amt_dev { /* Used only in gateway mode */ u64 mac:48, reserved:16; + /* AMT gateway side message handler queue */ + struct amt_events events[AMT_MAX_EVENTS]; + u8 event_idx; + u8 nr_events; }; #define AMT_TOS 0xc0 diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ebfa3df6f8dc365b4ce5f4c4fb573c37193492ab..fd6b510d114bc103b08c932521998ed25e0c51af 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index daead5fb389aea689637a447370739cd910b5248..6395f6b9a5d299115eb472bf5dfa20fe2768f0ad 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif @@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif @@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index 26fffda78cca4af03f0cfc8194f34b16d99a2fc7..1c979fd1904ce371d372ce42f3359fa277202295 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -384,7 +384,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; diff --git a/include/net/protocol.h b/include/net/protocol.h index f51c06ae365f5b6cc114d737c2d8f2611b74560a..6aef8cb11cc8c409e5f7a2519f5e747be584c8d5 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -52,8 +50,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/route.h b/include/net/route.h index 991a3985712dcb555ff3bd08f15e7f5dad4d1469..bbcf2aba149f9529c8fdabd754161beeef86b525 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f8499c121a9fc50643db559a6021a38..071735e10872c1d6cf603f4673f7a20965fb3a7c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/include/net/udp.h b/include/net/udp.h index b83a003305667d1c1cd1bac00580fec9164958b0..8dd4aa1485a695472fc0413e8b285fe81a940767 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 811897dadcae259fdc5e373f25b30fc302f4ce60..860f867c50c0e292fa31ac41388ce2bf50dab97e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 50ba70f019dea0996b1ec28e96191eb9e73c98ff..1c304fec89c02df9121b8cfa60087a843bf5f49c 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -511,10 +511,52 @@ static bool srcu_readers_active(struct srcu_struct *ssp) return sum; } -#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. -#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. -#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances. -#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances. +/* + * We use an adaptive strategy for synchronize_srcu() and especially for + * synchronize_srcu_expedited(). We spin for a fixed time period + * (defined below, boot time configurable) to allow SRCU readers to exit + * their read-side critical sections. If there are still some readers + * after one jiffy, we repeatedly block for one jiffy time periods. + * The blocking time is increased as the grace-period age increases, + * with max blocking time capped at 10 jiffies. + */ +#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5 + +static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY; +module_param(srcu_retry_check_delay, ulong, 0444); + +#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. +#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers. + +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase + // no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase + // no-delay instances. + +#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low)) +#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high)) +#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high)) +// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto +// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay() +// called from process_srcu(). +#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \ + (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY) + +// Maximum per-GP-phase consecutive no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY_PHASE \ + SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \ + SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \ + SRCU_DEFAULT_MAX_NODELAY_PHASE_HI) + +static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE; +module_param(srcu_max_nodelay_phase, ulong, 0444); + +// Maximum consecutive no-delay instances. +#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \ + SRCU_DEFAULT_MAX_NODELAY_PHASE : 100) + +static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY; +module_param(srcu_max_nodelay, ulong, 0444); /* * Return grace-period delay, zero if there are expedited grace @@ -522,16 +564,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp) */ static unsigned long srcu_get_delay(struct srcu_struct *ssp) { + unsigned long gpstart; + unsigned long j; unsigned long jbase = SRCU_INTERVAL; if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) jbase = 0; - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) - jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); - if (!jbase) { - WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); - if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) - jbase = 1; + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { + j = jiffies - 1; + gpstart = READ_ONCE(ssp->srcu_gp_start); + if (time_after(j, gpstart)) + jbase += j - gpstart; + if (!jbase) { + WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); + if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) + jbase = 1; + } } return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; } @@ -606,15 +654,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) } EXPORT_SYMBOL_GPL(__srcu_read_unlock); -/* - * We use an adaptive strategy for synchronize_srcu() and especially for - * synchronize_srcu_expedited(). We spin for a fixed time period - * (defined below) to allow SRCU readers to exit their read-side critical - * sections. If there are still some readers after a few microseconds, - * we repeatedly block for 1-millisecond time periods. - */ -#define SRCU_RETRY_CHECK_DELAY 5 - /* * Start an SRCU grace period. */ @@ -700,7 +739,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp */ static void srcu_gp_end(struct srcu_struct *ssp) { - unsigned long cbdelay; + unsigned long cbdelay = 1; bool cbs; bool last_lvl; int cpu; @@ -720,7 +759,9 @@ static void srcu_gp_end(struct srcu_struct *ssp) spin_lock_irq_rcu_node(ssp); idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = !!srcu_get_delay(ssp); + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + cbdelay = 0; + WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); rcu_seq_end(&ssp->srcu_gp_seq); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); @@ -921,12 +962,16 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, */ static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { + unsigned long curdelay; + + curdelay = !srcu_get_delay(ssp); + for (;;) { if (srcu_readers_active_idx_check(ssp, idx)) return true; - if (--trycount + !srcu_get_delay(ssp) <= 0) + if ((--trycount + curdelay) <= 0) return false; - udelay(SRCU_RETRY_CHECK_DELAY); + udelay(srcu_retry_check_delay); } } @@ -1582,7 +1627,7 @@ static void process_srcu(struct work_struct *work) j = jiffies; if (READ_ONCE(ssp->reschedule_jiffies) == j) { WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1); - if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY) + if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay) curdelay = 1; } else { WRITE_ONCE(ssp->reschedule_count, 1); @@ -1674,6 +1719,11 @@ static int __init srcu_bootup_announce(void) pr_info("Hierarchical SRCU implementation.\n"); if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF) pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff); + if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY) + pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay); + if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY) + pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay); + pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase); return 0; } early_initcall(srcu_bootup_announce); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b5152961b74324f2064b364c0be1f32a751d361b..7bf561262cb86a3b87b2502b842cedcfe2115209 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1701,7 +1701,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ p->dl.dl_throttled = 0; - BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); + if (!(flags & ENQUEUE_REPLENISH)) + printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n", + task_pid_nr(p)); + return; } diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 8b28fad1319bd0d6bdfb30db651b264b024ae64f..bb9962b33f95cec8cd6203f62fe3adf1fca1a480 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -227,7 +227,7 @@ void __post_watch_notification(struct watch_list *wlist, if (lock_wqueue(wqueue)) { post_one_notification(wqueue, n); - unlock_wqueue(wqueue);; + unlock_wqueue(wqueue); } } diff --git a/mm/gup.c b/mm/gup.c index 5512644076246d9baaa4873c9019cde4e17a2e37..e2a39e30756d5421c6cb2dc5f950a9fae98eeb4a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -87,7 +87,8 @@ static inline struct folio *try_get_folio(struct page *page, int refs) * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); goto retry; } @@ -176,7 +177,8 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - folio_put_refs(folio, refs); + if (!put_devmap_managed_page_refs(&folio->page, refs)) + folio_put_refs(folio, refs); } /** diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a57e1be41401b4105c27c7fc84bc54c1ac7cc555..a18c071c294e35c33823edf0f21dd3094afd031b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4788,8 +4788,13 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, * sharing with another vma. */ ; - } else if (unlikely(is_hugetlb_entry_migration(entry) || - is_hugetlb_entry_hwpoisoned(entry))) { + } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { + bool uffd_wp = huge_pte_uffd_wp(entry); + + if (!userfaultfd_wp(dst_vma) && uffd_wp) + entry = huge_pte_clear_uffd_wp(entry); + set_huge_pte_at(dst, addr, dst_pte, entry); + } else if (unlikely(is_hugetlb_entry_migration(entry))) { swp_entry_t swp_entry = pte_to_swp_entry(entry); bool uffd_wp = huge_pte_uffd_wp(entry); @@ -5947,6 +5952,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { + put_page(*pagep); ret = -ENOMEM; *pagep = NULL; goto out; diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 4b5e5a3d3a6388cb42197459da7144552a1bbd52..6aff49f6b79ecfe000272177830621392ef9df79 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -603,14 +603,6 @@ static unsigned long kfence_init_pool(void) addr += 2 * PAGE_SIZE; } - /* - * The pool is live and will never be deallocated from this point on. - * Remove the pool object from the kmemleak object tree, as it would - * otherwise overlap with allocations returned by kfence_alloc(), which - * are registered with kmemleak through the slab post-alloc hook. - */ - kmemleak_free(__kfence_pool); - return 0; } @@ -623,8 +615,16 @@ static bool __init kfence_init_pool_early(void) addr = kfence_init_pool(); - if (!addr) + if (!addr) { + /* + * The pool is live and will never be deallocated from this point on. + * Ignore the pool object from the kmemleak phys object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_ignore_phys(__pa(__kfence_pool)); return true; + } /* * Only release unprotected pages, and do not try to go back and change diff --git a/mm/memory.c b/mm/memory.c index 4cf7d4b6c950d9f4cddd2ff92d6f479b568fae44..1c6027adc5426b48f89e3e4e820e0c0144d5a579 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3043,7 +3043,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) pte_t entry; VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); - VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page)); + VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page)); /* * Clear the pages cpupid information as the existing @@ -4369,9 +4369,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return VM_FAULT_OOM; } - /* See comment in handle_pte_fault() */ + /* + * See comment in handle_pte_fault() for how this scenario happens, we + * need to return NOPAGE so that we drop this page. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) - return 0; + return VM_FAULT_NOPAGE; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); diff --git a/mm/memremap.c b/mm/memremap.c index b870a659eee67f8edbdf620df9a10ae0a3fae77a..745eea0f99c39cd78c6edfcfed35058748613d08 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page) } #ifdef CONFIG_FS_DAX -bool __put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs) { if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) return false; @@ -509,9 +509,9 @@ bool __put_devmap_managed_page(struct page *page) * refcount is 1, then the page is free and the refcount is * stable because nobody holds a reference on the page. */ - if (page_ref_dec_return(page) == 1) + if (page_ref_sub_return(page, refs) == 1) wake_up_var(&page->_refcount); return true; } -EXPORT_SYMBOL(__put_devmap_managed_page); +EXPORT_SYMBOL(__put_devmap_managed_page_refs); #endif /* CONFIG_FS_DAX */ diff --git a/mm/secretmem.c b/mm/secretmem.c index 206ed6b40c1d0fcfbb6abcbced65581c647c1892..f06279d6190a5f0eca83cb2ef5710b67077288da 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) gfp_t gfp = vmf->gfp_mask; unsigned long addr; struct page *page; + vm_fault_t ret; int err; if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return vmf_error(-EINVAL); + filemap_invalidate_lock_shared(mapping); + retry: page = find_lock_page(mapping, offset); if (!page) { page = alloc_page(gfp | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } err = set_direct_map_invalid_noflush(page); if (err) { put_page(page); - return vmf_error(err); + ret = vmf_error(err); + goto out; } __SetPageUptodate(page); @@ -86,7 +92,8 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) if (err == -EEXIST) goto retry; - return vmf_error(err); + ret = vmf_error(err); + goto out; } addr = (unsigned long)page_address(page); @@ -94,7 +101,11 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) } vmf->page = page; - return VM_FAULT_LOCKED; + ret = VM_FAULT_LOCKED; + +out: + filemap_invalidate_unlock_shared(mapping); + return ret; } static const struct vm_operations_struct secretmem_vm_ops = { @@ -162,12 +173,20 @@ static int secretmem_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); + struct address_space *mapping = inode->i_mapping; unsigned int ia_valid = iattr->ia_valid; + int ret; + + filemap_invalidate_lock(mapping); if ((ia_valid & ATTR_SIZE) && inode->i_size) - return -EINVAL; + ret = -EINVAL; + else + ret = simple_setattr(mnt_userns, dentry, iattr); - return simple_setattr(mnt_userns, dentry, iattr); + filemap_invalidate_unlock(mapping); + + return ret; } static const struct inode_operations secretmem_iops = { diff --git a/mm/shmem.c b/mm/shmem.c index a6f56530813387b8767a0a98f51324de58a6584c..b7f2d4a5686733ee75ed2ee6c9fda01b6196f387 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3392,7 +3392,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nr_blocks: ctx->blocks = memparse(param->string, &rest); - if (*rest) + if (*rest || ctx->blocks > S64_MAX) goto bad_value; ctx->seen |= SHMEM_SEEN_BLOCKS; break; @@ -3514,10 +3514,7 @@ static int shmem_reconfigure(struct fs_context *fc) raw_spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; - if (ctx->blocks > S64_MAX) { - err = "Number of blocks too large"; - goto out; - } + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; diff --git a/net/core/filter.c b/net/core/filter.c index 2a6a0b0ce43e49b3320954809859f39f71242f99..7950f75207658c326b50cd69ce1c2b46863aee28 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7041,7 +7041,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -EINVAL; if (!th->ack || th->rst || th->syn) @@ -7116,7 +7116,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 5f85e01d4093bb01dc52348204626aa29d616fcf..b0ff6153be6232c5df27a64ac6e271a546cfe6ce 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -64,7 +64,7 @@ u32 secure_tcpv6_ts_off(const struct net *net, .daddr = *(struct in6_addr *)daddr, }; - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); @@ -120,7 +120,7 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { - if (net->ipv4.sysctl_tcp_timestamps != 1) + if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 3f00a28fe762affdd19eabb7777efb9b79586373..5daa1fa542490e884f427bf0a554890768b53907 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -387,7 +387,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req || + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) || (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. @@ -545,7 +545,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { - if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req)) goto select_by_hash; goto failure; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 3738f2d40a0bafbcfbb7b3e6eac93545ba8b37a9..2dd76eb1621c74b514f3e0480a688da7ff44627b 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -248,6 +248,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, struct netlink_ext_ack extack = {0}; bool change_vlan_filtering = false; struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; bool vlan_filtering; int err; @@ -270,8 +271,8 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, * VLAN-aware bridge. */ if (change_vlan_filtering && ds->vlan_filtering_is_global) { - dsa_switch_for_each_port(dp, ds) { - struct net_device *br = dsa_port_bridge_dev_get(dp); + dsa_switch_for_each_port(other_dp, ds) { + struct net_device *br = dsa_port_bridge_dev_get(other_dp); if (br && br_vlan_enabled(br)) { change_vlan_filtering = false; @@ -799,7 +800,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, ds->vlan_filtering = vlan_filtering; dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = dp->slave; + struct net_device *slave = other_dp->slave; /* We might be called in the unbind path, so not * all slave devices might still be registered. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ac67f6b4ec700cc0a83aeb8a72a38096b4d0fc7f..252c8bceaba42bc01a8bf266c0acf842521e5871 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -217,7 +217,7 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { @@ -335,7 +335,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->hdrincl = 1; } - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -1710,24 +1710,14 @@ static const struct net_protocol igmp_protocol = { }; #endif -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol tcp_protocol = { - .early_demux = tcp_v4_early_demux, - .early_demux_handler = tcp_v4_early_demux, +static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, .icmp_strict_tag_validation = 1, }; -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct net_protocol udp_protocol = { - .early_demux = udp_v4_early_demux, - .early_demux_handler = udp_v4_early_demux, +static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 6eea1e9e998d5efd2d476a955282ecf56434baff..f8ad04470d3ace7f2e59ee5890467d689be52752 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -507,7 +507,7 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("%s: %s digestsize %u != %hu\n", + pr_info("%s: %s digestsize %u != %u\n", __func__, x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b21238df33014dacaf0c9c409101e686f7490f35..b694f352ce7a80a20bc0893a6378d8db69c42a48 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1108,7 +1108,7 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %hu\n", + pr_info("ESP: %s digestsize %u != %u\n", x->aalg->alg_name, crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits / 8); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d9fdcbae16ee388249f35b51747b222e091af667..db7b2503f068209c30cc0c81295a33fba168ad03 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2216,7 +2216,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { - if (net->ipv4.sysctl_fib_multipath_use_neigh) { + if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { if (!fib_good_nh(nexthop_nh)) continue; if (!first) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 57c4f0d87a7a2d1ebf453f227eaa1776d992f7eb..d5d745c3e345b710d365d31a5801a006072f0e4e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -881,7 +881,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * values please see * Documentation/networking/ip-sysctl.rst */ - switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) { default: net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", &iph->daddr); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b65d074d9620a0e2c5332c29e7741a62c3466c97..e3ab0cb616246b2b120515996e23ce1e748f2fb6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -467,7 +467,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; - if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(pmc->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return skb; mtu = READ_ONCE(dev->mtu); @@ -593,7 +594,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(pmc->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -736,7 +737,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return 0; if (type == IGMP_HOST_LEAVE_MESSAGE) @@ -825,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); igmp_ifc_start_timer(in_dev, 1); } @@ -920,7 +922,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return false; - if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(group) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return false; rcu_read_lock(); @@ -1006,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * received value was zero, use the default or statically * configured value. */ - in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; /* RFC3376, 8.3. Query Response Interval: @@ -1045,7 +1048,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; spin_lock_bh(&im->lock); if (im->tm_running) @@ -1186,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1237,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) swap(im->tomb, pmc->tomb); swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } else { - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: + READ_ONCE(net->ipv4.sysctl_igmp_qrv); } in_dev_put(pmc->interface); kfree_pmc(pmc); @@ -1296,7 +1301,8 @@ static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; reporter = im->reporter; @@ -1338,13 +1344,14 @@ static void igmp_group_added(struct ip_mc_list *im) #ifdef CONFIG_IP_MULTICAST if (im->multiaddr == IGMP_ALL_HOSTS) return; - if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) + if (ipv4_is_local_multicast(im->multiaddr) && + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) return; if (in_dev->dead) return; - im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; + im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { spin_lock_bh(&im->lock); igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); @@ -1358,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) * IN() to IN(A). */ if (im->sfmode == MCAST_EXCLUDE) - im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); igmp_ifc_event(in_dev); #endif @@ -1642,7 +1649,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) if (im->multiaddr == IGMP_ALL_HOSTS) continue; if (ipv4_is_local_multicast(im->multiaddr) && - !net->ipv4.sysctl_igmp_llm_reports) + !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports)) continue; /* a failover is happening and switches @@ -1749,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) in_dev->mr_qi = IGMP_QUERY_INTERVAL; in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; - in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; + in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); } #else static void ip_mc_reset(struct in_device *in_dev) @@ -1883,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1947,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2126,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -2192,7 +2199,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, count++; } err = -ENOBUFS; - if (count >= net->ipv4.sysctl_igmp_max_memberships) + if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships)) goto done; iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); if (!iml) @@ -2379,7 +2386,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } /* else, add a new source to the filter */ - if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) { + if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { err = -ENOBUFS; goto done; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 53f5f956d9485df5cb863c8287c1fa9989bb29c9..eb31c7158b39cbcefbef4d4f3ee9ce0d8d606ca9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -263,7 +263,7 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * goto other_half_scan; } - if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) { /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; @@ -833,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); - max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e3aa436a1bdf8f5349b5ab7f8ef3c91a6cd2c3bd..e18931a6d15341d7115e608d614b16ad93f91273 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -157,7 +157,7 @@ int ip_forward(struct sk_buff *skb) !skb_sec_path(skb)) ip_rt_send_redirect(skb); - if (net->ipv4.sysctl_ip_fwd_update_priority) + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority)) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b1165f717cd1f21d05e7ea204853ca6b05ac9703..1b512390b3cf30dd123d75a6794b993553589b09 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,14 +312,13 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } -INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); +int tcp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int (*edemux)(struct sk_buff *skb); int err, drop_reason; struct rtable *rt; @@ -332,21 +331,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, goto drop_error; } - if (net->ipv4.sysctl_ip_early_demux && + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, - udp_v4_early_demux, skb); - if (unlikely(err)) - goto drop_error; - /* must reload iph, skb->head might have changed */ - iph = ip_hdr(skb); + switch (iph->protocol) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { + tcp_v4_early_demux(skb); + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { + err = udp_v4_early_demux(skb); + if (unlikely(err)) + goto drop_error; + + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + break; } } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 445a9ecaefa19b7ee92bbd98f3c164c2389a2e87..a8a323ecbb54b702e0a744e44f34ddcef7e2d383 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -782,7 +782,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = -EINVAL; @@ -832,7 +832,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf)) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist_flex); @@ -1244,7 +1244,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, } /* numsrc >= (1G-4) overflow in 32 bits */ if (msf->imsf_numsrc >= 0x3ffffffcU || - msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) { + msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) { kfree(msf); err = -ENOBUFS; break; @@ -1606,7 +1606,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, { struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - net->ipv4.sysctl_ip_default_ttl : + READ_ONCE(net->ipv4.sysctl_ip_default_ttl) : inet->uc_ttl); break; } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 918c61fda0f3000fc3e8ccbdb65652dc320c19db..d640adcaf1b125590ad60578c815b5a9ce20dfa2 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -62,7 +62,7 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -117,7 +117,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); skb_reset_transport_header(nskb); icmph = skb_put_zero(nskb, sizeof(struct icmphdr)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 28836071f0a691dc23952f0273a7faf61b1ebb2c..0088a4c64d77ed89b86b736d1c40b099a817ea27 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -387,7 +387,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - net->ipv4.sysctl_ip_default_ttl); + READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 356f535f3443b3aef35f2d8836c618272d290cc1..4702c61207a87133b3d0ef64bb40ea8adaaefec5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1398,7 +1398,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) struct fib_info *fi = res->fi; u32 mtu = 0; - if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; @@ -1929,7 +1929,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, const struct sk_buff *skb, bool *p_has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -1958,7 +1958,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, const struct sk_buff *skb, bool has_inner) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; /* We assume the packet carries an encapsulation, but if none was @@ -2018,7 +2018,7 @@ static u32 fib_multipath_custom_hash_skb(const struct net *net, static u32 fib_multipath_custom_hash_fl4(const struct net *net, const struct flowi4 *fl4) { - u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) @@ -2048,7 +2048,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, struct flow_keys hash_keys; u32 mhash = 0; - switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b387c48351559c586f634163a229a4b75d3f04bb..942d2dfa11151170bfc72287fb8f7aeac7b5b9ad 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -247,12 +247,12 @@ bool cookie_timestamp_decode(const struct net *net, return true; } - if (!net->ipv4.sysctl_tcp_timestamps) + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) return false; tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; - if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack) + if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) return false; if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) @@ -261,7 +261,7 @@ bool cookie_timestamp_decode(const struct net *net, tcp_opt->wscale_ok = 1; tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; - return net->ipv4.sysctl_tcp_window_scaling != 0; + return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } EXPORT_SYMBOL(cookie_timestamp_decode); @@ -340,7 +340,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 108fd86f2718d6124017aaee8f640a4df54d5529..5490c285668b93b5683328a2c284af66e2f19b0d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -84,7 +84,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, * port limit. */ if ((range[1] < range[0]) || - (range[0] < net->ipv4.sysctl_ip_prot_sock)) + (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else set_local_port_range(net, range); @@ -110,7 +110,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, .extra2 = &ip_privileged_port_max, }; - pports = net->ipv4.sysctl_ip_prot_sock; + pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); @@ -122,7 +122,7 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write, if (range[0] < pports) ret = -EINVAL; else - net->ipv4.sysctl_ip_prot_sock = pports; + WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports); } return ret; @@ -350,61 +350,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, return ret; } -static void proc_configure_early_demux(int enabled, int protocol) -{ - struct net_protocol *ipprot; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_protocol *ip6prot; -#endif - - rcu_read_lock(); - - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot) - ipprot->early_demux = enabled ? ipprot->early_demux_handler : - NULL; - -#if IS_ENABLED(CONFIG_IPV6) - ip6prot = rcu_dereference(inet6_protos[protocol]); - if (ip6prot) - ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : - NULL; -#endif - rcu_read_unlock(); -} - -static int proc_tcp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_tcp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_TCP); - } - - return ret; -} - -static int proc_udp_early_demux(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = 0; - - ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - - if (write && !ret) { - int enabled = init_net.ipv4.sysctl_udp_early_demux; - - proc_configure_early_demux(enabled, IPPROTO_UDP); - } - - return ret; -} - static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -707,14 +652,14 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_udp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_udp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_tcp_early_demux + .proc_handler = proc_dou8vec_minmax, }, { .procname = "nexthop_compat_mode", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2222dfdde3168c3f940a6511bba6ec0ead6db882..2faaaaf540ac1df534d0e7ded33e07cd3897f3e8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -441,7 +441,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -1150,7 +1150,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -3617,7 +3618,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & + TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -3967,12 +3969,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = tp->linger2; if (val >= 0) - val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ; + val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdbcf2a6d08ef4a5164247b5a5b4b222289b191a..825b216d11f52bc79a29b7a696005175a55a6804 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -332,7 +332,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk, const struct dst_entry *dst, int flag) { - return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || tcp_sk(sk)->fastopen_no_cookie || (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); } @@ -347,7 +347,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, const struct dst_entry *dst) { bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; - int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; int ret = 0; @@ -489,7 +489,7 @@ void tcp_fastopen_active_disable(struct sock *sk) { struct net *net = sock_net(sk); - if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) return; /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ @@ -510,7 +510,8 @@ void tcp_fastopen_active_disable(struct sock *sk) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + unsigned int tfo_bh_timeout = + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); unsigned long timeout; int tfo_da_times; int multiplier; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ec4edc37313bd3d316a8947d1b184ae3efdbc66..07dbcbae7782880baaf5fbca1d344170aa032c12 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1051,7 +1051,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, tp->undo_marker ? tp->undo_retrans : 0); #endif tp->reordering = min_t(u32, (metric + mss - 1) / mss, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); } /* This exciting event is worth to be remembered. 8) */ @@ -2030,7 +2030,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) return; tp->reordering = min_t(u32, tp->packets_out + addend, - sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering)); tp->reord_seen++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } @@ -2095,7 +2095,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp) static bool tcp_is_rack(const struct sock *sk) { - return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION; + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_LOSS_DETECTION; } /* If we detect SACK reneging, forget all SACK information @@ -2139,6 +2140,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; + u8 reordering; tcp_timeout_mark_lost(sk); @@ -2159,10 +2161,12 @@ void tcp_enter_loss(struct sock *sk) /* Timeout in disordered state after receiving substantial DUPACKs * suggests that the degree of reordering is over-estimated. */ + reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering); if (icsk->icsk_ca_state <= TCP_CA_Disorder && - tp->sacked_out >= net->ipv4.sysctl_tcp_reordering) + tp->sacked_out >= reordering) tp->reordering = min_t(unsigned int, tp->reordering, - net->ipv4.sysctl_tcp_reordering); + reordering); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); @@ -3464,7 +3468,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ - if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering) + if (tcp_sk(sk)->reordering > + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering)) return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; @@ -4056,7 +4061,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && net->ipv4.sysctl_tcp_window_scaling) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { @@ -4072,7 +4077,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && net->ipv4.sysctl_tcp_timestamps))) { + (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -4080,7 +4085,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && net->ipv4.sysctl_tcp_sack) { + !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) { opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } @@ -5567,7 +5572,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) + if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg)) ptr--; ptr += ntohl(th->seq); @@ -6797,11 +6802,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; - bool want_cookie = false; struct net *net = sock_net(sk); + bool want_cookie = false; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); #ifdef CONFIG_SYN_COOKIES - if (net->ipv4.sysctl_tcp_syncookies) { + if (syncookies) { msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); @@ -6809,8 +6817,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - if (!queue->synflood_warned && - net->ipv4.sysctl_tcp_syncookies != 2 && + if (!queue->synflood_warned && syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); @@ -6859,7 +6866,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; - if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 && !inet_csk_reqsk_queue_is_full(sk)) return 0; @@ -6893,13 +6900,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; + u8 syncookies; + + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. */ - if ((net->ipv4.sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { + if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; @@ -6948,10 +6957,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { + int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); + /* Kill the following clause, if you dislike this way. */ - if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && + if (!syncookies && + (max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (max_syn_backlog >> 2)) && !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5a3c44c4fb70f1d3ecc596e694a86267f1c44a..d16e6e40f47ba4aee2b1a412c42d2f9791698e27 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -108,10 +108,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) { + int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse); const struct inet_timewait_sock *tw = inet_twsk(sktw); const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); - int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse; if (reuse == 2) { /* Still does not detect *everything* that goes through diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7029b0e98edb285102dcab4521f296511a70dc57..a501150deaa3b326f1cffdbdd7924d856dde6682 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -428,7 +428,8 @@ void tcp_update_metrics(struct sock *sk) if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val < tp->reordering && - tp->reordering != net->ipv4.sysctl_tcp_reordering) + tp->reordering != + READ_ONCE(net->ipv4.sysctl_tcp_reordering)) tcp_metric_set(tm, TCP_METRIC_REORDERING, tp->reordering); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6854bb1fb32b265ef4c0838267bf272c57f7601e..cb95d88497aeae8802b0c42c83fae67352ef4d98 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -173,7 +173,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -781,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (sk != req->rsk_listener) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); - if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 11aa0ab10bbad17142b06baa7eecd7d569b33131..c38e07b50639c7f9ac11975d1750d6a0099d74e4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -791,18 +791,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { + if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; @@ -1719,7 +1719,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + mss_now = max(mss_now, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss)); return mss_now; } @@ -1762,10 +1763,10 @@ void tcp_mtup_init(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); - icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; + icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss)); icsk->icsk_mtup.probe_size = 0; if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; @@ -1897,7 +1898,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -2282,7 +2283,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) u32 interval; s32 delta; - interval = net->ipv4.sysctl_tcp_probe_interval; + interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval); delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); @@ -2366,7 +2367,7 @@ static int tcp_mtu_probe(struct sock *sk) * probing process by not resetting search range to its orignal. */ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || - interval < net->ipv4.sysctl_tcp_probe_threshold) { + interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) { /* Check whether enough time has elaplased for * another round of probing. */ @@ -2740,7 +2741,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rcu_access_pointer(tp->fastopen_rsk)) return false; - early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; + early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans); /* Schedule a loss probe in 2*RTT for SACK capable connections * not in loss recovery, that are either limited by cwnd or application. */ @@ -3104,7 +3105,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -3646,7 +3647,7 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr); - if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; #ifdef CONFIG_TCP_MD5SIG @@ -3682,7 +3683,7 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sock_net(sk)->ipv4.sysctl_tcp_window_scaling, + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), &rcv_wscale, rcv_wnd); @@ -4089,7 +4090,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 48f30e7209f291c9191074e5f4a11501a374961c..50abaa941387d3e7328b6859ea5118937fc12be4 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -14,7 +14,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk) return 0; if (tp->sacked_out >= tp->reordering && - !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH)) + !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_NO_DUPTHRESH)) return 0; } @@ -187,7 +188,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); - if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & + TCP_RACK_STATIC_REO_WND) || !rs->prior_delivered) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 20cf4a98c69d85d07c884d7bc8316191ff962bd8..50bba370486e83a80c562875ecb564d6f7186270 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -143,7 +143,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) */ static int tcp_orphan_retries(struct sock *sk, bool alive) { - int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) @@ -163,7 +163,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) int mss; /* Black hole detection */ - if (!net->ipv4.sysctl_tcp_mtu_probing) + if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { @@ -171,9 +171,9 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); - mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); + mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); + mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -239,17 +239,18 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } - retry_until = net->ipv4.sysctl_tcp_retries2; + retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -380,7 +381,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; - max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; + max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; @@ -406,12 +407,15 @@ abort: tcp_write_err(sk); static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); - int max_retries = icsk->icsk_syn_retries ? : - sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); + int max_retries; req->rsk_ops->syn_ack_timeout(req); + /* add one more retry for fastopen */ + max_retries = icsk->icsk_syn_retries ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; @@ -574,7 +578,7 @@ void tcp_retransmit_timer(struct sock *sk) * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -585,7 +589,7 @@ void tcp_retransmit_timer(struct sock *sk) } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70564ddccc4677d0e091ef6ae747b001be4bd1aa..6f354f8be2c57538eef832aee926f225c27aa72c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -226,7 +226,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; - if (net->ipv4.sysctl_ip_no_pmtu_disc) + if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 0322cc86b84eaaed7529a4b65fdfba4c97a38375..e1ebf5e42ebe9ac39c13f109b03853c557ff2718 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -45,20 +45,23 @@ #include #include -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); - - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; - - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - INDIRECT_CALL_2(edemux, tcp_v6_early_demux, - udp_v6_early_demux, skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } } + if (!skb_valid_dst(skb)) ip6_route_input(skb); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9cc123f000fbcfbeff7728bfee5339d6dd6470f9..5014aa663452763b9895b2da0579367bfb3d0430 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; u32 tsoff = 0; - if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37dd4aa91c6bae1df92a1e4edca362c50cd97b9..9d3ede2932582e716d9fa15dd2fad7795ea1b993 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1822,7 +1822,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) goto discard_it; } -INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol tcpv6_protocol = { - .early_demux = tcp_v6_early_demux, - .early_demux_handler = tcp_v6_early_demux, +static const struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 55afd7f39c0450ff442a0499b7f8e42bf1a613bc..e2f2e087a7531d57cf965a9fa3dd77e35b879d06 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index e479dd0561c54d0c1a67ffdc3506739fbcd7c434..16915f8eef2b16eec7da7ecaaf8b4a4af5dd94e2 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -405,7 +405,7 @@ synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = net->ipv4.sysctl_ip_default_ttl; + iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9bb4d3dcc994fb259c12cfad3fd84f0f164ab1eb..ac366c99086fd3b9f0721c93c53cf930f4e495c5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3533,7 +3533,7 @@ int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], struct netlink_ext_ack *extack) { - int i, j, index, err = 0; + int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); @@ -3553,14 +3553,18 @@ int tc_setup_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry->hw_index = act->tcfa_index; index = 0; err = tc_setup_offload_act(act, entry, &index, extack); - if (!err) - j += index; - else + if (err) goto err_out_locked; + + for (k = 0; k < index ; k++) { + entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); + entry[k].hw_index = act->tcfa_index; + } + + j += index; + spin_unlock_bh(&act->tcfa_lock); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 35928fefae3327f97688f0857de63bc17e3429d6..1a094b087d88bdcc5cf213c5ecd0a053a9b1b8c6 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -358,7 +358,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && - !net->ipv4.sysctl_ip_nonlocal_bind) + !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind)) return 0; if (ipv6_only_sock(sctp_opt2sk(sp))) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c4d057b2941d51cd87ae02caa7141cd1a4a3be42..0bde36b564727960352478e24a9c293f09a6189d 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2122,7 +2122,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; + lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ce827e79c66a41d46333efbffd1c57ebf825ee03..879b9024678edf37380e032508ee9c598194c49c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -97,13 +97,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) unsigned long flags; spin_lock_irqsave(&tls_device_lock, flags); + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) + goto unlock; + list_move_tail(&ctx->list, &tls_device_gc_list); /* schedule_work inside the spinlock * to make sure tls_device_down waits for that work. */ schedule_work(&tls_device_gc_work); - +unlock: spin_unlock_irqrestore(&tls_device_lock, flags); } @@ -194,8 +197,7 @@ void tls_device_sk_destruct(struct sock *sk) clean_acked_data_disable(inet_csk(sk)); } - if (refcount_dec_and_test(&tls_ctx->refcount)) - tls_device_queue_ctx_destruction(tls_ctx); + tls_device_queue_ctx_destruction(tls_ctx); } EXPORT_SYMBOL_GPL(tls_device_sk_destruct); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1876ea61fdce29d15be13c9635b0d6cf7c90587..f1a0bab920a5580a507932974a86f96c08113759 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2678,8 +2678,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, *num_xfrms = 0; return 0; } - if (IS_ERR(pols[0])) + if (IS_ERR(pols[0])) { + *num_pols = 0; return PTR_ERR(pols[0]); + } *num_xfrms = pols[0]->xfrm_nr; @@ -2694,6 +2696,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); + *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 08564e0eef2037bc13016066a56e864acee89900..ccfb172eb5b8d4f2a1561ecc3dc9b54635011ddb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2620,7 +2620,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int err; if (family == AF_INET && - xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc) + READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)) x->props.flags |= XFRM_STATE_NOPMTUDISC; err = -EPROTONOSUPPORT; diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 46f7542db08cee435642678386bb9a2f2dd76551..dc07b6d12e300ea72d8ba4b1cd6c33b5c9b6936c 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -180,7 +180,7 @@ lx-symbols command.""" self.breakpoint.delete() self.breakpoint = None self.breakpoint = LoadModuleBreakpoint( - "kernel/module.c:do_init_module", self) + "kernel/module/main.c:do_init_module", self) else: gdb.write("Note: symbol update on module loading not supported " "with this gdb version\n") diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 99a128a666fbb00533f638a339cdab1c535e5f1d..4ce5d257938753a1818958a0414359a789d28027 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -55,40 +54,8 @@ struct rk_i2s_dev { const struct rk_i2s_pins *pins; unsigned int bclk_ratio; spinlock_t lock; /* tx/rx lock */ - struct pinctrl *pinctrl; - struct pinctrl_state *bclk_on; - struct pinctrl_state *bclk_off; }; -static int i2s_pinctrl_select_bclk_on(struct rk_i2s_dev *i2s) -{ - int ret = 0; - - if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_on)) - ret = pinctrl_select_state(i2s->pinctrl, - i2s->bclk_on); - - if (ret) - dev_err(i2s->dev, "bclk enable failed %d\n", ret); - - return ret; -} - -static int i2s_pinctrl_select_bclk_off(struct rk_i2s_dev *i2s) -{ - - int ret = 0; - - if (!IS_ERR(i2s->pinctrl) && !IS_ERR_OR_NULL(i2s->bclk_off)) - ret = pinctrl_select_state(i2s->pinctrl, - i2s->bclk_off); - - if (ret) - dev_err(i2s->dev, "bclk disable failed %d\n", ret); - - return ret; -} - static int i2s_runtime_suspend(struct device *dev) { struct rk_i2s_dev *i2s = dev_get_drvdata(dev); @@ -125,49 +92,38 @@ static inline struct rk_i2s_dev *to_info(struct snd_soc_dai *dai) return snd_soc_dai_get_drvdata(dai); } -static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) +static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) { unsigned int val = 0; int retry = 10; - int ret = 0; spin_lock(&i2s->lock); if (on) { - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, - I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_DMACR, + I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE); - ret = regmap_update_bits(i2s->regmap, I2S_XFER, - I2S_XFER_TXS_START | I2S_XFER_RXS_START, - I2S_XFER_TXS_START | I2S_XFER_RXS_START); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_XFER, + I2S_XFER_TXS_START | I2S_XFER_RXS_START, + I2S_XFER_TXS_START | I2S_XFER_RXS_START); i2s->tx_start = true; } else { i2s->tx_start = false; - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, - I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_DMACR, + I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_DISABLE); if (!i2s->rx_start) { - ret = regmap_update_bits(i2s->regmap, I2S_XFER, - I2S_XFER_TXS_START | - I2S_XFER_RXS_START, - I2S_XFER_TXS_STOP | - I2S_XFER_RXS_STOP); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_XFER, + I2S_XFER_TXS_START | + I2S_XFER_RXS_START, + I2S_XFER_TXS_STOP | + I2S_XFER_RXS_STOP); udelay(150); - ret = regmap_update_bits(i2s->regmap, I2S_CLR, - I2S_CLR_TXC | I2S_CLR_RXC, - I2S_CLR_TXC | I2S_CLR_RXC); - if (ret < 0) - goto end; + regmap_update_bits(i2s->regmap, I2S_CLR, + I2S_CLR_TXC | I2S_CLR_RXC, + I2S_CLR_TXC | I2S_CLR_RXC); regmap_read(i2s->regmap, I2S_CLR, &val); @@ -182,57 +138,44 @@ static int rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on) } } } -end: spin_unlock(&i2s->lock); - if (ret < 0) - dev_err(i2s->dev, "lrclk update failed\n"); - - return ret; } -static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) +static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) { unsigned int val = 0; int retry = 10; - int ret = 0; spin_lock(&i2s->lock); if (on) { - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, + regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE); - if (ret < 0) - goto end; - ret = regmap_update_bits(i2s->regmap, I2S_XFER, + regmap_update_bits(i2s->regmap, I2S_XFER, I2S_XFER_TXS_START | I2S_XFER_RXS_START, I2S_XFER_TXS_START | I2S_XFER_RXS_START); - if (ret < 0) - goto end; i2s->rx_start = true; } else { i2s->rx_start = false; - ret = regmap_update_bits(i2s->regmap, I2S_DMACR, + regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_DISABLE); - if (ret < 0) - goto end; if (!i2s->tx_start) { - ret = regmap_update_bits(i2s->regmap, I2S_XFER, + regmap_update_bits(i2s->regmap, I2S_XFER, I2S_XFER_TXS_START | I2S_XFER_RXS_START, I2S_XFER_TXS_STOP | I2S_XFER_RXS_STOP); - if (ret < 0) - goto end; + udelay(150); - ret = regmap_update_bits(i2s->regmap, I2S_CLR, + regmap_update_bits(i2s->regmap, I2S_CLR, I2S_CLR_TXC | I2S_CLR_RXC, I2S_CLR_TXC | I2S_CLR_RXC); - if (ret < 0) - goto end; + regmap_read(i2s->regmap, I2S_CLR, &val); + /* Should wait for clear operation to finish */ while (val) { regmap_read(i2s->regmap, I2S_CLR, &val); @@ -244,12 +187,7 @@ static int rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) } } } -end: spin_unlock(&i2s->lock); - if (ret < 0) - dev_err(i2s->dev, "lrclk update failed\n"); - - return ret; } static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai, @@ -487,26 +425,17 @@ static int rockchip_i2s_trigger(struct snd_pcm_substream *substream, case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) - ret = rockchip_snd_rxctrl(i2s, 1); + rockchip_snd_rxctrl(i2s, 1); else - ret = rockchip_snd_txctrl(i2s, 1); - /* Do not turn on bclk if lrclk open fails. */ - if (ret < 0) - return ret; - i2s_pinctrl_select_bclk_on(i2s); + rockchip_snd_txctrl(i2s, 1); break; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { - if (!i2s->tx_start) - i2s_pinctrl_select_bclk_off(i2s); - ret = rockchip_snd_rxctrl(i2s, 0); - } else { - if (!i2s->rx_start) - i2s_pinctrl_select_bclk_off(i2s); - ret = rockchip_snd_txctrl(i2s, 0); - } + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + rockchip_snd_rxctrl(i2s, 0); + else + rockchip_snd_txctrl(i2s, 0); break; default: ret = -EINVAL; @@ -807,33 +736,6 @@ static int rockchip_i2s_probe(struct platform_device *pdev) } i2s->bclk_ratio = 64; - i2s->pinctrl = devm_pinctrl_get(&pdev->dev); - if (IS_ERR(i2s->pinctrl)) - dev_err(&pdev->dev, "failed to find i2s pinctrl\n"); - - i2s->bclk_on = pinctrl_lookup_state(i2s->pinctrl, - "bclk_on"); - if (IS_ERR_OR_NULL(i2s->bclk_on)) - dev_err(&pdev->dev, "failed to find i2s default state\n"); - else - dev_dbg(&pdev->dev, "find i2s bclk state\n"); - - i2s->bclk_off = pinctrl_lookup_state(i2s->pinctrl, - "bclk_off"); - if (IS_ERR_OR_NULL(i2s->bclk_off)) - dev_err(&pdev->dev, "failed to find i2s gpio state\n"); - else - dev_dbg(&pdev->dev, "find i2s bclk_off state\n"); - - i2s_pinctrl_select_bclk_off(i2s); - - i2s->playback_dma_data.addr = res->start + I2S_TXDR; - i2s->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - i2s->playback_dma_data.maxburst = 4; - - i2s->capture_dma_data.addr = res->start + I2S_RXDR; - i2s->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - i2s->capture_dma_data.maxburst = 4; dev_set_drvdata(&pdev->dev, i2s); diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 811897dadcae259fdc5e373f25b30fc302f4ce60..860f867c50c0e292fa31ac41388ce2bf50dab97e 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 71b3066023685fc3df4f17d727de85f821c4ba3a..616ed40196554eda9fa746b4d536ab220a0f6c52 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,6 +3,6 @@ TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name -CFLAGS += -O2 -g -Wall -I../../../../usr/include/ +CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES) include ../lib.mk diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 4158da0da2bba8e5e8ea12acd87f6cff6206e3ae..2237d1aac801411bd1b3f3cadb1324d6603c03a8 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -82,8 +82,9 @@ static int next_cpu(int cpu) return cpu; } -static void *migration_worker(void *ign) +static void *migration_worker(void *__rseq_tid) { + pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; cpu_set_t allowed_mask; int r, i, cpu; @@ -106,7 +107,7 @@ static void *migration_worker(void *ign) * stable, i.e. while changing affinity is in-progress. */ smp_wmb(); - r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", errno, strerror(errno)); smp_wmb(); @@ -231,7 +232,8 @@ int main(int argc, char *argv[]) vm = vm_create_default(VCPU_ID, 0, guest_code); ucall_init(vm, NULL); - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, + (void *)(unsigned long)gettid()); for (i = 0; !done; i++) { vcpu_run(vm, VCPU_ID);