This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [1/3] Userspace probes prototype-take2


>>-----Original Message-----
>>From: systemtap-owner@sourceware.org [mailto:systemtap-owner@sourceware.org] On Behalf Of Prasanna S Panchamukhi
>>Sent: 2006年2月8日 22:11
>>To: systemtap@sources.redhat.com
>>Subject: [1/3] Userspace probes prototype-take2
>>
>>Here is take-2 on the user space probes prototype.
>>I have taken care of most of the comments from Yanmin Zhang.
>>Presently this patch set does not work with Config PREEMPT
>>enabled kernels. Next patch release will fix this.
>>
>>Thanks
>>Prasanna
>>+/**
>>+ * This routine looks for an existing uprobe at the given offset and inode.
>>+ * If it's found, returns the corresponding kprobe pointer.
>>+ */
>>+static struct kprobe __kprobes *get_kprobe_user(struct inode *inode,
>>+							unsigned long offset)
>>+{
>>+	struct hlist_head *head;
>>+	struct hlist_node *node;
>>+	struct kprobe *p, *kpr;
>>+	struct uprobe *uprobe;
>>+
>>+	head = &kprobe_table[hash_ptr((kprobe_opcode_t *)
>>+			(((unsigned long)inode) * offset), KPROBE_HASH_BITS)];
>>+
>>+	hlist_for_each_entry(p, node, head, hlist) {
[YM] hlist_for_each_entry should be hlist_for_each_entry_rcu?



>>+		if (kernel_text_address((unsigned long)p->addr))
>>+			continue;
>>+
>>+		if (p->pre_handler == aggr_pre_handler) {
>>+			kpr = list_entry(rcu_dereference(p)->list.next,
>>+							typeof(*kpr), list);
>>+			uprobe = container_of(kpr, struct uprobe, kp);
>>+		} else
>>+			uprobe = container_of(p, struct uprobe, kp);
>>+
>>+		if ((uprobe->inode == inode) && (uprobe->offset == offset))
>>+			return p;
>>+	}
>>+
>>+	return NULL;
>>+}
>>+
>>+/**
>>+ * Finds a uprobe at the specified user-space address in the current task.
>>+ * Points current_uprobe at that uprobe and returns the corresponding kprobe.
>>+ */
>>+static struct kprobe __kprobes *get_uprobe(void *addr)
>>+{
>>+	struct mm_struct *mm = current->mm;
>>+	struct vm_area_struct *vma;
>>+	struct inode *inode;
>>+	unsigned long offset;
>>+	struct kprobe *p, *kpr;
>>+	struct uprobe *uprobe;
>>+
>>+	down_read(&mm->mmap_sem);
>>+	vma = find_vma(mm, (unsigned long)addr);
>>+
>>+	BUG_ON(!vma);	/* this should not happen, not in our memory map */
>>+
>>+	offset = (unsigned long)addr - vma->vm_start +
>>+						(vma->vm_pgoff << PAGE_SHIFT);
>>+	if (!vma->vm_file) {
>>+		up_read(&mm->mmap_sem);
>>+		return NULL;
>>+	}
>>+
>>+	inode = vma->vm_file->f_dentry->d_inode;
>>+	up_read(&mm->mmap_sem);
>>+
>>+	p = get_kprobe_user(inode, offset);
>>+	if (!p)
>>+		return NULL;
>>+
>>+	if (p->pre_handler == aggr_pre_handler) {
>>+		kpr = list_entry(rcu_dereference(p)->list.next, typeof(*kpr),
>>+									list);
>>+		uprobe = container_of(kpr, struct uprobe, kp);
>>+	} else
>>+                uprobe = container_of(p, struct uprobe, kp);
>>+
>>+	if (uprobe)
>>+		 __get_cpu_var(current_uprobe) = uprobe;
>>+
>>+	return p;
>>+}
>>+
>>+/*
>>+ * This routine is called either:
>>+ *	- under the kprobe_mutex - during kprobe_[un]register()
>>+ *				OR
>>+ *	- with preemption disabled - from arch/xxx/kernel/kprobes.c
>>+ */
>>+struct kprobe __kprobes *get_kprobe(void *addr)
>>+{
>>+	struct hlist_head *head;
>>+	struct hlist_node *node;
>>+	struct kprobe *p;
>>+
>>+	if (!kernel_text_address((unsigned long)addr))
>>+		return get_uprobe(addr);
>>+
>>+	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
>>+	hlist_for_each_entry_rcu(p, node, head, hlist) {
>>+		if (p->addr == addr)
>>+			return p;
>>+	}
>>+	return NULL;
>>+}
>>+
>> /* Walks the list and increments nmissed count for multiprobe case */
>> void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
>> {
>>@@ -559,6 +643,342 @@ void __kprobes unregister_jprobe(struct
>> 	unregister_kprobe(&jp->kp);
>> }
>>
>>+typedef int (*process_uprobe_func_t)(struct uprobe *uprobe,
>>+				unsigned long *address, struct page *page);
>>+
>>+/**
>>+ * Adds the kprobe structure for the specified uprobe to either the
>>+ * kprobe_table or to the aggregate hash list for a given inode and offset.
>>+ * Also copies the instructions and inserts the breakpoint.
>>+ */
>>+int __kprobes insert_kprobe_user(struct uprobe *uprobe, unsigned long *address,
>>+							struct page *page)
>>+{
>>+	struct kprobe *old_p;
>>+	struct hlist_head *head;
>>+
>>+	uprobe->kp.nmissed = 0;
>>+	old_p = get_kprobe_user(uprobe->inode, uprobe->offset);
>>+	if (old_p)
>>+		return register_aggr_kprobe(old_p, &uprobe->kp);
>>+
>>+	head = &kprobe_table[hash_ptr((kprobe_opcode_t *)(uprobe->offset *
>>+			(unsigned long)uprobe->inode), KPROBE_HASH_BITS)];
>>+
>>+	INIT_HLIST_NODE(&uprobe->kp.hlist);
>>+	hlist_add_head_rcu(&uprobe->kp.hlist, head);
>>+
>>+	arch_copy_uprobe(&uprobe->kp, address);
>>+	arch_arm_uprobe(address);
>>+
>>+	return 0;
>>+}
>>+
>>+/**
>>+ * Wait for the page to be unlocked if someone else had locked it,
>>+ * then map the page and insert or remove the breakpoint.
>>+ */
>>+static int __kprobes map_uprobe_page(struct page *page, struct uprobe *uprobe,
>>+				     process_uprobe_func_t process_kprobe_user)
>>+{
>>+	int ret = 0;
>>+	unsigned long *uprobe_address;
>>+
>>+	if (!page)
>>+		return -EINVAL; /* TODO: more suitable errno */
>>+
>>+	wait_on_page_locked(page);
It looks not appropriate to use wait_on_page_locked here. Usually, a thread locks the page
and calls readpage/readpges to deliver the I/O request to lower layer, then it calls wait_on_page_locked
to wait the I/O complete. Here, map_uprobe_page doesn't start readpage. I suggest to use lock_page directly.



>>+	/* could probably retry readpage here. */
>>+	if (!PageUptodate(page))
>>+		return -EINVAL; /* TODO: more suitable errno */
>>+
>>+	lock_page(page);
>>+
>>+	uprobe_address = kmap(page);
>>+	uprobe_address = (unsigned long *)((unsigned long)uprobe_address +
>>+				(unsigned long) (uprobe->offset & ~PAGE_MASK));
>>+	ret = (*process_kprobe_user)(uprobe, uprobe_address, page);
>>+	kunmap(page);
>>+
>>+	unlock_page(page);
>>+
>>+	return ret;
>>+}
>>+
>>+/**
>>+ * flush_vma walks through the list of process private mappings,
>>+ * gets the vma containing the offset and flush all the vma's
>>+ * containing the probed page.
>>+ */
>>+static void __kprobes flush_vma(struct address_space *mapping,
>>+				struct page *page, struct uprobe *uprobe)
>>+{
>>+	struct vm_area_struct *vma = NULL;
>>+	struct prio_tree_iter iter;
>>+	struct prio_tree_root *head = &mapping->i_mmap;
>>+	struct mm_struct *mm;
>>+	unsigned long start, end, offset = uprobe->offset;
>>+
>>+	vma_prio_tree_foreach(vma, &iter, head, offset, offset) {
>>+		mm = vma->vm_mm;
>>+		down_read(&mm->mmap_sem);
>>+		spin_lock(&mapping->i_mmap_lock);
>>+
>>+
>>+		spin_lock(&mm->page_table_lock);
Locks here are confusing. I think mapping->i_mmap_lock is enough. Call spin_lock(&mm->page_table_lock)
before vma_prio_tree_foreach and spin_unlock out of vma_prio_tree_foreach.



>>+		start = vma->vm_start - (vma->vm_pgoff << PAGE_SHIFT);
>>+		end = vma->vm_end - (vma->vm_pgoff << PAGE_SHIFT);
>>+		spin_unlock(&mm->page_table_lock);
>>+
>>+		if ((start + offset) < end)
>>+			flush_icache_user_range(vma, page,
>>+					(unsigned long)uprobe->kp.addr,
>>+						sizeof(kprobe_opcode_t));
>>+	spin_unlock(&mapping->i_mmap_lock);
>>+	up_read(&mm->mmap_sem);
>>+	}
>>+}
>>+
>>+/**
>>+ * Check if the given offset lies within the given page range.
>>+ */
>>+static inline int find_page_probe(unsigned long offset,
>>+						unsigned long page_start)
>>+{
Suggest to delete this function. The caller could use:
	If (offset & PAGE_MASK == page_start & PAGE_MASK)



>>+	unsigned long page_end = page_start + PAGE_SIZE;
>>+
>>+	if ((offset >= page_start) && (offset < page_end))
>>+		return 1;
>>+
>>+	return 0;
>>+}
>>+
>>+/**
>>+ * Walk the uprobe_module_list and return the uprobe module with matching
>>+ * inode.
>>+ */
>>+static struct uprobe_module __kprobes *get_module_by_inode(struct inode *inode)
>>+{
>>+	struct uprobe_module *umodule;
>>+
>>+	list_for_each_entry(umodule, &uprobe_module_list, mlist) {
>>+		if (umodule->nd.dentry->d_inode == inode)
>>+			return umodule;
>>+	}
>>+
>>+	return NULL;
>>+}
>>+
>>+/**
>>+ * Gets exclusive write access to the given inode to ensure that the file
>>+ * on which probes are currently applied does not change. Use the function,
>>+ * deny_write_access_to_inode() we added in fs/namei.c.
>>+ */
>>+static inline int ex_write_lock(struct inode *inode)
>>+{
>>+	return deny_write_access_to_inode(inode);
>>+}
>>+
>>+/**
>>+ * Called when removing user space probes to release the write lock on the
>>+ * inode.
>>+ */
>>+static inline int ex_write_unlock(struct inode *inode)
>>+{
>>+	atomic_inc(&inode->i_writecount);
>>+	return 0;
>>+}
>>+
>>+/**
>>+ * Get the inode operations. This function leaves with the dentry held
>>+ * and taking with the inode writelock held to ensure that the file on
>>+ * which probes are currently active does not change from under us. Add uprobe
>>+ * and uprobe_module to the appropriate hash list. Also swithces i_op to
>>+ * hooks into readpage and readpages().
>>+ */
>>+static void __kprobes get_inode_ops(struct uprobe *uprobe,
>>+				   struct uprobe_module *umodule)
>>+{
>>+	INIT_HLIST_HEAD(&umodule->ulist_head);
>>+	hlist_add_head(&uprobe->ulist, &umodule->ulist_head);
>>+	list_add(&umodule->mlist, &uprobe_module_list);
>>+}
>>+
>>+int __kprobes remove_kprobe_user(struct uprobe *uprobe, unsigned long *address,
>>+				struct page *page)
>>+{
>>+	struct kprobe *old_p, *list_p, *p;
>>+	int cleanup_p;
>>+
>>+	p = &uprobe->kp;
>>+	mutex_lock(&kprobe_mutex);
>>+	old_p = get_kprobe_user(uprobe->inode, uprobe->offset);
>>+	if (unlikely(!old_p)) {
>>+		mutex_unlock(&kprobe_mutex);
>>+		return 0;
>>+	}
>>+
>>+	if (p != old_p) {
>>+		list_for_each_entry_rcu(list_p, &old_p->list, list)
>>+			if (list_p == p)
>>+			/* kprobe p is a valid probe */
>>+				goto valid_p;
>>+		mutex_unlock(&kprobe_mutex);
>>+		return 0;
>>+	}
>>+
>>+valid_p:
>>+	if ((old_p == p) || ((old_p->pre_handler == aggr_pre_handler) &&
>>+		(p->list.next == &old_p->list) &&
>>+		(p->list.prev == &old_p->list))) {
>>+		/* Only probe on the hash list */
>>+		arch_disarm_uprobe(p, (kprobe_opcode_t *)address);
>>+		hlist_del_rcu(&old_p->hlist);
>>+		cleanup_p = 1;
>>+	} else {
>>+		list_del_rcu(&p->list);
>>+		cleanup_p = 0;
>>+	}
>>+
>>+	mutex_unlock(&kprobe_mutex);
>>+
>>+	synchronize_sched();
>>+	if (cleanup_p) {
>>+		if (p != old_p) {
>>+			list_del_rcu(&p->list);
>>+			kfree(old_p);
>>+		}
>>+	}
>>+
>>+	return 0;
>>+}
>>+
>>+/**
>>+ * unregister_uprobe: Disarms the probe, removes the kprobe and uprobe
>>+ * pointers from the hash lists. Unhooks readpage routines.
>>+ */
>>+void __kprobes unregister_uprobe(struct uprobe *uprobe)
>>+{
>>+	struct address_space *mapping;
>>+	struct uprobe_module *umodule;
>>+	struct page *page;
>>+	int ret = 0;
>>+
>>+	if (!uprobe->inode)
>>+		return;
>>+
>>+	mapping = uprobe->inode->i_mapping;
>>+
>>+	page = find_get_page(mapping, uprobe->offset >> PAGE_CACHE_SHIFT);
[YM] It's better to check if(page==NULL) here. If page==NULL when the page was deleted, below 
map_uprobe_page and flush_vma should be bypassed.


>>+
>>+	ret = map_uprobe_page(page, uprobe, remove_kprobe_user);
>>+	/*
>>+	 * TODO: unregister_uprobe should not fail, need to handle if it fails.
>>+	 */
>>+	flush_vma(mapping, page, uprobe);
>>+
>>+	if (page)
>>+		page_cache_release(page);
>>+
>>+	mutex_lock(&uprobe_mutex);
>>+	if (!(umodule = get_module_by_inode(uprobe->inode)))
>>+		goto out;
>>+
>>+	hlist_del(&uprobe->ulist);
>>+	if (hlist_empty(&umodule->ulist_head)) {
>>+		list_del(&umodule->mlist);
>>+		ex_write_unlock(uprobe->inode);
>>+		path_release(&umodule->nd);
>>+		kfree(umodule);
>>+	}
>>+out:
>>+	mutex_unlock(&uprobe_mutex);
>>+}
>>+
>>+/**
>>+ * register_uprobe(): combination of inode and offset is used to identify each
>>+ * probe uniquely. Each uprobe can be found from the kprobes_hash table by
>>+ * using inode and offset. register_uprobe(), inserts the breakpoint at the
>>+ * given address by locating and mapping the page. return 0 on success and
>>+ * error on failure.
>>+ */
>>+int __kprobes register_uprobe(struct uprobe *uprobe)
>>+{
>>+	struct address_space *mapping;
>>+	struct uprobe_module *umodule = NULL;
>>+	struct inode *inode;
>>+	struct nameidata nd;
>>+	struct page *page;
>>+	int error = 0;
>>+
>>+	INIT_HLIST_NODE(&uprobe->ulist);
>>+
>>+	/*
>>+	 * TODO: Need to calculate the absolute file offset for dynamic
>>+	 * shared libraries.
>>+	 * uprobe->offset = (unsigned long)uprobe->kp.addr & UPROBE_OFFSET_MASK;
>>+	 */
>>+	if ((error = path_lookup(uprobe->pathname, LOOKUP_FOLLOW, &nd)))
>>+		return error;
>>+
>>+	inode = nd.dentry->d_inode;
>>+
>>+	error = ex_write_lock(inode);
>>+	if (error) {
>>+		path_release(&nd);
>>+		goto out;
>>+	}
>>+
>>+	mutex_lock(&uprobe_mutex);
>>+	/*
>>+	 * Check if there are probes already on this application and add the
>>+	 * corresponding uprobe to per application probe's list.
>>+	 */
>>+	umodule = get_module_by_inode(inode);
>>+	if (!umodule) {
>>+		/*
>>+		 * Allocate a uprobe_module structure for this application
>>+		 * if not allocated before.
>>+		 */
>>+		umodule = kzalloc(sizeof(struct uprobe_module), GFP_KERNEL);
>>+		if (!umodule) {
>>+			error = -ENOMEM;
>>+			path_release(&nd);
>>+			ex_write_unlock(inode);
>>+			goto out;
>>+		}
>>+		memcpy(&umodule->nd, &nd, sizeof(struct nameidata));
>>+		get_inode_ops(uprobe, umodule);
>>+	} else {
>>+		ex_write_unlock(inode);
>>+		hlist_add_head(&uprobe->ulist, &umodule->ulist_head);
>>+	}
>>+
>>+	uprobe->inode = inode;
>>+	mapping = inode->i_mapping;
>>+	mutex_lock(&kprobe_mutex);
>>+	page = find_get_page(mapping, (uprobe->offset >> PAGE_CACHE_SHIFT));
[YM] The same thing like unregister_uprobe. 


>>+
>>+	/*
>>+	 * If error == -EINVAL, return success, probes will inserted by
>>+	 * readpage hooks.
>>+	 * TODO: Use a more suitable errno?
>>+	 */
>>+	error = map_uprobe_page(page, uprobe, insert_kprobe_user);
>>+	if (error == -EINVAL)
>>+		error = 0;
>>+	flush_vma(mapping, page, uprobe);
>>+
>>+	if (page)
>>+		page_cache_release(page);
>>+
>>+	mutex_unlock(&kprobe_mutex);
>>+out:
>>+	mutex_unlock(&uprobe_mutex);
>>+
>>+	return error;
>>+}
>>+
>> #ifdef ARCH_SUPPORTS_KRETPROBES
>>
>> /*
>>@@ -650,6 +1070,8 @@ static int __init init_kprobes(void)
>> 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
>> 	}
>>
>>+	/* initialize uprobe_module_list */
>>+	INIT_LIST_HEAD(&uprobe_module_list);
>> 	err = arch_init_kprobes();
>> 	if (!err)
>> 		err = register_die_notifier(&kprobe_exceptions_nb);
>>@@ -666,4 +1088,5 @@ EXPORT_SYMBOL_GPL(unregister_jprobe);
>> EXPORT_SYMBOL_GPL(jprobe_return);
>> EXPORT_SYMBOL_GPL(register_kretprobe);
>> EXPORT_SYMBOL_GPL(unregister_kretprobe);
>>-
>>+EXPORT_SYMBOL_GPL(register_uprobe);
>>+EXPORT_SYMBOL_GPL(unregister_uprobe);
>>
>>_
>>--
>>Prasanna S Panchamukhi
>>Linux Technology Center
>>India Software Labs, IBM Bangalore
>>Email: prasanna@in.ibm.com
>>Ph: 91-80-51776329


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]