From gregkh@suse.de Wed Nov 9 12:08:06 2005 Return-Path: X-Original-To: lkml@kolivas.org Delivered-To: lkml@kolivas.org Received: from bhhdoa.org.au (bhhdoa.org.au [65.98.99.88]) by mail.kolivas.org (Postfix) with ESMTP id 60BBBC5C96 for ; Wed, 9 Nov 2005 12:09:16 +1100 (EST) Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by bhhdoa.org.au (Postfix) with ESMTP id 5C9BE515E9 for ; Wed, 9 Nov 2005 10:02:07 +1100 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1030492AbVKIBIg (ORCPT ); Tue, 8 Nov 2005 20:08:36 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1030495AbVKIBIg (ORCPT ); Tue, 8 Nov 2005 20:08:36 -0500 Received: from mail.kroah.org ([69.55.234.183]:22475 "EHLO perch.kroah.org") by vger.kernel.org with ESMTP id S1030492AbVKIBIe (ORCPT ); Tue, 8 Nov 2005 20:08:34 -0500 Received: from [192.168.0.10] (c-24-22-115-24.hsd1.or.comcast.net [24.22.115.24]) (authenticated) by perch.kroah.org (8.11.6/8.11.6) with ESMTP id jA918VL21087; Tue, 8 Nov 2005 17:08:31 -0800 Received: from greg by echidna.kroah.org with local (masqmail 0.2.19) id 1EZeRa-5rx-00; Tue, 08 Nov 2005 17:08:06 -0800 Date: Tue, 8 Nov 2005 17:08:06 -0800 From: Greg KH To: linux-kernel@vger.kernel.org, stable@kernel.org Cc: torvalds@osdl.org Subject: Re: Linux 2.6.14.1 Message-ID: <20051109010806.GB22439@kroah.com> References: <20051109010729.GA22439@kroah.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20051109010729.GA22439@kroah.com> User-Agent: Mutt/1.5.11 Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org Status: RO X-Status: UC X-KMail-EncryptionState: X-KMail-SignatureState: X-KMail-MDN-Sent: diff --git a/Makefile b/Makefile index 1fa7e53..e5315e6 100644 Index: linux-2.6.14-ck4/arch/s390/appldata/appldata_base.c =================================================================== --- linux-2.6.14-ck4.orig/arch/s390/appldata/appldata_base.c 2005-11-09 20:58:02.000000000 +1100 +++ linux-2.6.14-ck4/arch/s390/appldata/appldata_base.c 2005-11-09 20:58:29.000000000 +1100 @@ -592,12 +592,15 @@ int appldata_register_ops(struct appldat */ void appldata_unregister_ops(struct appldata_ops *ops) { + void *table; spin_lock(&appldata_ops_lock); - unregister_sysctl_table(ops->sysctl_header); list_del(&ops->list); - kfree(ops->ctl_table); + /* at that point any incoming access will fail */ + table = ops->ctl_table; ops->ctl_table = NULL; spin_unlock(&appldata_ops_lock); + unregister_sysctl_table(ops->sysctl_header); + kfree(table); P_INFO("%s-ops unregistered!\n", ops->name); } /********************** module-ops management **************************/ Index: linux-2.6.14-ck4/include/linux/proc_fs.h =================================================================== --- linux-2.6.14-ck4.orig/include/linux/proc_fs.h 2005-11-09 20:58:02.000000000 +1100 +++ linux-2.6.14-ck4/include/linux/proc_fs.h 2005-11-09 20:58:29.000000000 +1100 @@ -66,6 +66,7 @@ struct proc_dir_entry { write_proc_t *write_proc; atomic_t count; /* use count */ int deleted; /* delete flag */ + void *set; }; struct kcore_list { Index: linux-2.6.14-ck4/include/linux/sysctl.h =================================================================== --- linux-2.6.14-ck4.orig/include/linux/sysctl.h 2005-11-09 20:58:26.000000000 +1100 +++ linux-2.6.14-ck4/include/linux/sysctl.h 2005-11-09 20:58:29.000000000 +1100 @@ -24,6 +24,7 @@ #include struct file; +struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a call to sysctl? In other words, what is @@ -930,6 +931,8 @@ struct ctl_table_header { ctl_table *ctl_table; struct list_head ctl_entry; + int used; + struct completion *unregistering; }; struct ctl_table_header * register_sysctl_table(ctl_table * table, Index: linux-2.6.14-ck4/kernel/sysctl.c =================================================================== --- linux-2.6.14-ck4.orig/kernel/sysctl.c 2005-11-09 20:58:26.000000000 +1100 +++ linux-2.6.14-ck4/kernel/sysctl.c 2005-11-09 20:58:29.000000000 +1100 @@ -169,7 +169,7 @@ struct file_operations proc_sys_file_ope extern struct proc_dir_entry *proc_sys_root; -static void register_proc_table(ctl_table *, struct proc_dir_entry *); +static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif @@ -1038,10 +1038,51 @@ static ctl_table dev_table[] = { extern void init_irq_proc (void); +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + void __init sysctl_init(void) { #ifdef CONFIG_PROC_FS - register_proc_table(root_table, proc_sys_root); + register_proc_table(root_table, proc_sys_root, &root_table_header); init_irq_proc(); #endif } @@ -1050,6 +1091,7 @@ int do_sysctl(int __user *name, int nlen void __user *newval, size_t newlen) { struct list_head *tmp; + int error = -ENOTDIR; if (nlen <= 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; @@ -1058,20 +1100,30 @@ int do_sysctl(int __user *name, int nlen if (!oldlenp || get_user(old_len, oldlenp)) return -EFAULT; } + spin_lock(&sysctl_lock); tmp = &root_table_header.ctl_entry; do { struct ctl_table_header *head = list_entry(tmp, struct ctl_table_header, ctl_entry); void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, + + if (!use_table(head)) + continue; + + spin_unlock(&sysctl_lock); + + error = parse_table(name, nlen, oldval, oldlenp, newval, newlen, head->ctl_table, &context); kfree(context); + + spin_lock(&sysctl_lock); + unuse_table(head); if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; + break; + } while ((tmp = tmp->next) != &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); + return error; } asmlinkage long sys_sysctl(struct __sysctl_args __user *args) @@ -1282,12 +1334,16 @@ struct ctl_table_header *register_sysctl return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); + tmp->used = 0; + tmp->unregistering = NULL; + spin_lock(&sysctl_lock); if (insert_at_head) list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); #ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); + register_proc_table(table, proc_sys_root, tmp); #endif return tmp; } @@ -1301,10 +1357,13 @@ struct ctl_table_header *register_sysctl */ void unregister_sysctl_table(struct ctl_table_header * header) { - list_del(&header->ctl_entry); + might_sleep(); + spin_lock(&sysctl_lock); + start_unregistering(header); #ifdef CONFIG_PROC_FS unregister_proc_table(header->ctl_table, proc_sys_root); #endif + spin_unlock(&sysctl_lock); kfree(header); } @@ -1315,7 +1374,7 @@ void unregister_sysctl_table(struct ctl_ #ifdef CONFIG_PROC_FS /* Scan the sysctl entries in table and add them all into /proc */ -static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) +static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) { struct proc_dir_entry *de; int len; @@ -1351,13 +1410,14 @@ static void register_proc_table(ctl_tabl de = create_proc_entry(table->procname, mode, root); if (!de) continue; + de->set = set; de->data = (void *) table; if (table->proc_handler) de->proc_fops = &proc_sys_file_operations; } table->de = de; if (de->mode & S_IFDIR) - register_proc_table(table->child, de); + register_proc_table(table->child, de, set); } } @@ -1382,6 +1442,13 @@ static void unregister_proc_table(ctl_ta continue; } + /* + * In any case, mark the entry as goner; we'll keep it + * around if it's busy, but we'll know to do nothing with + * its fields. We are under sysctl_lock here. + */ + de->data = NULL; + /* Don't unregister proc entries that are still being used.. */ if (atomic_read(&de->count)) continue; @@ -1395,27 +1462,38 @@ static ssize_t do_rw_proc(int write, str size_t count, loff_t *ppos) { int op; - struct proc_dir_entry *de; + struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); struct ctl_table *table; size_t res; - ssize_t error; - - de = PDE(file->f_dentry->d_inode); - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - if (ctl_perm(table, op)) - return -EPERM; + ssize_t error = -ENOTDIR; - res = count; - - error = (*table->proc_handler) (table, write, file, buf, &res, ppos); - if (error) - return error; - return res; + spin_lock(&sysctl_lock); + if (de && de->data && use_table(de->set)) { + /* + * at that point we know that sysctl was not unregistered + * and won't be until we finish + */ + spin_unlock(&sysctl_lock); + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + goto out; + error = -EPERM; + op = (write ? 002 : 004); + if (ctl_perm(table, op)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = (*table->proc_handler)(table, write, file, + buf, &res, ppos); + if (!error) + error = res; + out: + spin_lock(&sysctl_lock); + unuse_table(de->set); + } + spin_unlock(&sysctl_lock); + return error; } static int proc_opensys(struct inode *inode, struct file *file) Index: linux-2.6.14-ck4/Makefile =================================================================== --- linux-2.6.14-ck4.orig/Makefile 2005-11-09 20:58:02.000000000 +1100 +++ linux-2.6.14-ck4/Makefile 2005-11-09 20:58:29.000000000 +1100 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 14 -EXTRAVERSION = +EXTRAVERSION = .1 NAME=Affluent Albatross # *DOCUMENTATION*